File tree Expand file tree Collapse file tree 1 file changed +15
-1
lines changed
sql/core/src/main/scala/org/apache/spark/sql/json Expand file tree Collapse file tree 1 file changed +15
-1
lines changed Original file line number Diff line number Diff line change @@ -38,6 +38,12 @@ private[sql] object JsonRDD2 extends Logging {
3838 parseJson(json, schema, columnNameOfCorruptRecords)
3939 }
4040
41+ /**
42+ * Infer the type of a collection of json records in three stages:
43+ * 1. Infer the type of each record
44+ * 2. Merge types by choosing the lowest type necessary to cover equal keys
45+ * 3. Replace any remaining null fields with string, the top type
46+ */
4147 def inferSchema (
4248 json : RDD [String ],
4349 samplingRatio : Double = 1.0 ,
@@ -79,7 +85,15 @@ private[sql] object JsonRDD2 extends Logging {
7985 parser.nextToken()
8086 inferField(parser)
8187
82- case VALUE_STRING if parser.getTextLength < 1 => NullType
88+ case VALUE_STRING if parser.getTextLength < 1 =>
89+ // Zero length strings and nulls have special handling to deal
90+ // with JSON generators that do not distinguish between the two.
91+ // To accurately infer types for empty strings that are really
92+ // meant to represent nulls we assume that the two are isomorphic
93+ // but will defer treating null fields as strings until all the
94+ // record fields' types have been combined.
95+ NullType
96+
8397 case VALUE_STRING => StringType
8498 case START_OBJECT =>
8599 val builder = Seq .newBuilder[StructField ]
You can’t perform that action at this time.
0 commit comments