Skip to content

Commit 82e98f1

Browse files
Davies Liurxin
authored andcommitted
[SPARK-16078][SQL] Backport: from_utc_timestamp/to_utc_timestamp should not depends on local timezone
## What changes were proposed in this pull request? Back-port of #13784 to `branch-1.6` ## How was this patch tested? Existing tests. Author: Davies Liu <[email protected]> Closes #15554 from srowen/SPARK-16078.
1 parent b95ac0d commit 82e98f1

File tree

3 files changed

+74
-36
lines changed

3 files changed

+74
-36
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -658,16 +658,17 @@ case class FromUTCTimestamp(left: Expression, right: Expression)
658658
""".stripMargin
659659
} else {
660660
val tzTerm = ctx.freshName("tz")
661+
val utcTerm = ctx.freshName("utc")
661662
val tzClass = classOf[TimeZone].getName
662663
ctx.addMutableState(tzClass, tzTerm, s"""$tzTerm = $tzClass.getTimeZone("$tz");""")
664+
ctx.addMutableState(tzClass, utcTerm, s"""$utcTerm = $tzClass.getTimeZone("UTC");""")
663665
val eval = left.gen(ctx)
664666
s"""
665667
|${eval.code}
666668
|boolean ${ev.isNull} = ${eval.isNull};
667669
|long ${ev.value} = 0;
668670
|if (!${ev.isNull}) {
669-
| ${ev.value} = ${eval.value} +
670-
| ${tzTerm}.getOffset(${eval.value} / 1000) * 1000L;
671+
| ${ev.value} = $dtu.convertTz(${eval.value}, $utcTerm, $tzTerm);
671672
|}
672673
""".stripMargin
673674
}
@@ -783,16 +784,17 @@ case class ToUTCTimestamp(left: Expression, right: Expression)
783784
""".stripMargin
784785
} else {
785786
val tzTerm = ctx.freshName("tz")
787+
val utcTerm = ctx.freshName("utc")
786788
val tzClass = classOf[TimeZone].getName
787789
ctx.addMutableState(tzClass, tzTerm, s"""$tzTerm = $tzClass.getTimeZone("$tz");""")
790+
ctx.addMutableState(tzClass, utcTerm, s"""$utcTerm = $tzClass.getTimeZone("UTC");""")
788791
val eval = left.gen(ctx)
789792
s"""
790793
|${eval.code}
791794
|boolean ${ev.isNull} = ${eval.isNull};
792795
|long ${ev.value} = 0;
793796
|if (!${ev.isNull}) {
794-
| ${ev.value} = ${eval.value} -
795-
| ${tzTerm}.getOffset(${eval.value} / 1000) * 1000L;
797+
| ${ev.value} = $dtu.convertTz(${eval.value}, $tzTerm, $utcTerm);
796798
|}
797799
""".stripMargin
798800
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ object DateTimeUtils {
5555
// this is year -17999, calculation: 50 * daysIn400Year
5656
final val YearZero = -17999
5757
final val toYearZero = to2001 + 7304850
58+
final val TimeZoneGMT = TimeZone.getTimeZone("GMT")
5859

5960
@transient lazy val defaultTimeZone = TimeZone.getDefault
6061

@@ -854,24 +855,46 @@ object DateTimeUtils {
854855
guess
855856
}
856857

858+
/**
859+
* Convert the timestamp `ts` from one timezone to another.
860+
*
861+
* TODO: Because of DST, the conversion between UTC and human time is not exactly one-to-one
862+
* mapping, the conversion here may return wrong result, we should make the timestamp
863+
* timezone-aware.
864+
*/
865+
def convertTz(ts: SQLTimestamp, fromZone: TimeZone, toZone: TimeZone): SQLTimestamp = {
866+
// We always use local timezone to parse or format a timestamp
867+
val localZone = threadLocalLocalTimeZone.get()
868+
val utcTs = if (fromZone.getID == localZone.getID) {
869+
ts
870+
} else {
871+
// get the human time using local time zone, that actually is in fromZone.
872+
val localTs = ts + localZone.getOffset(ts / 1000L) * 1000L // in fromZone
873+
localTs - getOffsetFromLocalMillis(localTs / 1000L, fromZone) * 1000L
874+
}
875+
if (toZone.getID == localZone.getID) {
876+
utcTs
877+
} else {
878+
val localTs2 = utcTs + toZone.getOffset(utcTs / 1000L) * 1000L // in toZone
879+
// treat it as local timezone, convert to UTC (we could get the expected human time back)
880+
localTs2 - getOffsetFromLocalMillis(localTs2 / 1000L, localZone) * 1000L
881+
}
882+
}
883+
857884
/**
858885
* Returns a timestamp of given timezone from utc timestamp, with the same string
859886
* representation in their timezone.
860887
*/
861888
def fromUTCTime(time: SQLTimestamp, timeZone: String): SQLTimestamp = {
862-
val tz = TimeZone.getTimeZone(timeZone)
863-
val offset = tz.getOffset(time / 1000L)
864-
time + offset * 1000L
889+
convertTz(time, TimeZoneGMT, TimeZone.getTimeZone(timeZone))
865890
}
866891

867892
/**
868893
* Returns a utc timestamp from a given timestamp from a given timezone, with the same
869894
* string representation in their timezone.
870895
*/
871896
def toUTCTime(time: SQLTimestamp, timeZone: String): SQLTimestamp = {
872-
val tz = TimeZone.getTimeZone(timeZone)
873-
val offset = getOffsetFromLocalMillis(time / 1000L, tz)
874-
time - offset * 1000L
897+
convertTz(time, TimeZone.getTimeZone(timeZone), TimeZoneGMT)
875898
}
876899

877900
/**

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala

Lines changed: 39 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -472,39 +472,52 @@ class DateTimeUtilsSuite extends SparkFunSuite {
472472
assert(toJavaTimestamp(fromUTCTime(fromJavaTimestamp(Timestamp.valueOf(utc)), tz)).toString
473473
=== expected)
474474
}
475-
test("2011-12-25 09:00:00.123456", "UTC", "2011-12-25 09:00:00.123456")
476-
test("2011-12-25 09:00:00.123456", "JST", "2011-12-25 18:00:00.123456")
477-
test("2011-12-25 09:00:00.123456", "PST", "2011-12-25 01:00:00.123456")
478-
test("2011-12-25 09:00:00.123456", "Asia/Shanghai", "2011-12-25 17:00:00.123456")
479-
480-
// Daylight Saving Time
481-
test("2016-03-13 09:59:59.0", "PST", "2016-03-13 01:59:59.0")
482-
test("2016-03-13 10:00:00.0", "PST", "2016-03-13 03:00:00.0")
483-
test("2016-11-06 08:59:59.0", "PST", "2016-11-06 01:59:59.0")
484-
test("2016-11-06 09:00:00.0", "PST", "2016-11-06 01:00:00.0")
485-
test("2016-11-06 10:00:00.0", "PST", "2016-11-06 02:00:00.0")
475+
for (tz <- DateTimeTestUtils.ALL_TIMEZONES) {
476+
DateTimeTestUtils.withDefaultTimeZone(tz) {
477+
test("2011-12-25 09:00:00.123456", "UTC", "2011-12-25 09:00:00.123456")
478+
test("2011-12-25 09:00:00.123456", "JST", "2011-12-25 18:00:00.123456")
479+
test("2011-12-25 09:00:00.123456", "PST", "2011-12-25 01:00:00.123456")
480+
test("2011-12-25 09:00:00.123456", "Asia/Shanghai", "2011-12-25 17:00:00.123456")
481+
}
482+
}
483+
484+
DateTimeTestUtils.withDefaultTimeZone(TimeZone.getTimeZone("PST")) {
485+
// Daylight Saving Time
486+
test("2016-03-13 09:59:59.0", "PST", "2016-03-13 01:59:59.0")
487+
test("2016-03-13 10:00:00.0", "PST", "2016-03-13 03:00:00.0")
488+
test("2016-11-06 08:59:59.0", "PST", "2016-11-06 01:59:59.0")
489+
test("2016-11-06 09:00:00.0", "PST", "2016-11-06 01:00:00.0")
490+
test("2016-11-06 10:00:00.0", "PST", "2016-11-06 02:00:00.0")
491+
}
486492
}
487493

488494
test("to UTC timestamp") {
489495
def test(utc: String, tz: String, expected: String): Unit = {
490496
assert(toJavaTimestamp(toUTCTime(fromJavaTimestamp(Timestamp.valueOf(utc)), tz)).toString
491497
=== expected)
492498
}
493-
test("2011-12-25 09:00:00.123456", "UTC", "2011-12-25 09:00:00.123456")
494-
test("2011-12-25 18:00:00.123456", "JST", "2011-12-25 09:00:00.123456")
495-
test("2011-12-25 01:00:00.123456", "PST", "2011-12-25 09:00:00.123456")
496-
test("2011-12-25 17:00:00.123456", "Asia/Shanghai", "2011-12-25 09:00:00.123456")
497-
498-
// Daylight Saving Time
499-
test("2016-03-13 01:59:59", "PST", "2016-03-13 09:59:59.0")
500-
// 2016-03-13 02:00:00 PST does not exists
501-
test("2016-03-13 02:00:00", "PST", "2016-03-13 10:00:00.0")
502-
test("2016-03-13 03:00:00", "PST", "2016-03-13 10:00:00.0")
503-
test("2016-11-06 00:59:59", "PST", "2016-11-06 07:59:59.0")
504-
// 2016-11-06 01:00:00 PST could be 2016-11-06 08:00:00 UTC or 2016-11-06 09:00:00 UTC
505-
test("2016-11-06 01:00:00", "PST", "2016-11-06 09:00:00.0")
506-
test("2016-11-06 01:59:59", "PST", "2016-11-06 09:59:59.0")
507-
test("2016-11-06 02:00:00", "PST", "2016-11-06 10:00:00.0")
499+
500+
for (tz <- DateTimeTestUtils.ALL_TIMEZONES) {
501+
DateTimeTestUtils.withDefaultTimeZone(tz) {
502+
test("2011-12-25 09:00:00.123456", "UTC", "2011-12-25 09:00:00.123456")
503+
test("2011-12-25 18:00:00.123456", "JST", "2011-12-25 09:00:00.123456")
504+
test("2011-12-25 01:00:00.123456", "PST", "2011-12-25 09:00:00.123456")
505+
test("2011-12-25 17:00:00.123456", "Asia/Shanghai", "2011-12-25 09:00:00.123456")
506+
}
507+
}
508+
509+
DateTimeTestUtils.withDefaultTimeZone(TimeZone.getTimeZone("PST")) {
510+
// Daylight Saving Time
511+
test("2016-03-13 01:59:59", "PST", "2016-03-13 09:59:59.0")
512+
// 2016-03-13 02:00:00 PST does not exists
513+
test("2016-03-13 02:00:00", "PST", "2016-03-13 10:00:00.0")
514+
test("2016-03-13 03:00:00", "PST", "2016-03-13 10:00:00.0")
515+
test("2016-11-06 00:59:59", "PST", "2016-11-06 07:59:59.0")
516+
// 2016-11-06 01:00:00 PST could be 2016-11-06 08:00:00 UTC or 2016-11-06 09:00:00 UTC
517+
test("2016-11-06 01:00:00", "PST", "2016-11-06 09:00:00.0")
518+
test("2016-11-06 01:59:59", "PST", "2016-11-06 09:59:59.0")
519+
test("2016-11-06 02:00:00", "PST", "2016-11-06 10:00:00.0")
520+
}
508521
}
509522

510523
test("daysToMillis and millisToDays") {

0 commit comments

Comments
 (0)