@@ -793,6 +793,7 @@ private[hive] case class MetastoreRelation
793793
794794  @ transient override  lazy  val  statistics :  Statistics  =  Statistics (
795795    sizeInBytes =  {
796+       val  hiveContext  =  sqlContext.asInstanceOf [HiveContext ]
796797      val  totalSize  =  hiveQlTable.getParameters.get(StatsSetupConst .TOTAL_SIZE )
797798      val  rawDataSize  =  hiveQlTable.getParameters.get(StatsSetupConst .RAW_DATA_SIZE )
798799      //  TODO: check if this estimate is valid for tables after partition pruning.
@@ -801,13 +802,20 @@ private[hive] case class MetastoreRelation
801802      //  alternative would be going through Hadoop's FileSystem API, which can be expensive if a lot
802803      //  of RPCs are involved.  Besides `totalSize`, there are also `numFiles`, `numRows`,
803804      //  `rawDataSize` keys (see StatsSetupConst in Hive) that we can look at in the future.
804-       BigInt (
805-         //  When table is external,`totalSize` is always zero, which will influence join strategy
806-         //  so when `totalSize` is zero, use `rawDataSize` instead
807-         //  if the size is still less than zero, we use default size
808-         Option (totalSize).map(_.toLong).filter(_ >  0 )
809-           .getOrElse(Option (rawDataSize).map(_.toLong).filter(_ >  0 )
810-           .getOrElse(sqlContext.conf.defaultSizeInBytes)))
805+ 
806+       //  When table is external,`totalSize` is always zero, which will influence join strategy
807+       //  so when `totalSize` is zero, use `rawDataSize` instead
808+       //  if the size is still less than zero, we use default size
809+       val  sizeEst  =  Option (totalSize).map(_.toLong).filter(_ >  0 )
810+         .getOrElse(Option (rawDataSize).map(_.toLong).filter(_ >  0 )
811+         .getOrElse(hiveContext.hadoopFileSelector.flatMap(
812+           _.getFilesSizeInBytes(
813+             hiveQlTable.getTableName,
814+             hiveQlTable.getPath.getFileSystem(hiveContext.hiveconf),
815+             hiveQlTable.getPath)).filter(_ >  0 )
816+         .getOrElse(sqlContext.conf.defaultSizeInBytes)))
817+       logDebug(s " Size estimation for table  ${hiveQlTable.getTableName}:  $sizeEst bytes " )
818+       BigInt (sizeEst)
811819    }
812820  )
813821
0 commit comments