Skip to content

Commit 001a589

Browse files
Davies Liudavies
authored andcommitted
[SPARK-15613] [SQL] Fix incorrect days to millis conversion due to Daylight Saving Time
## What changes were proposed in this pull request? Internally, we use Int to represent a date (the days since 1970-01-01), when we convert that into unix timestamp (milli-seconds since epoch in UTC), we get the offset of a timezone using local millis (the milli-seconds since 1970-01-01 in a timezone), but TimeZone.getOffset() expect unix timestamp, the result could be off by one hour (in Daylight Saving Time (DST) or not). This PR change to use best effort approximate of posix timestamp to lookup the offset. In the event of changing of DST, Some time is not defined (for example, 2016-03-13 02:00:00 PST), or could lead to multiple valid result in UTC (for example, 2016-11-06 01:00:00), this best effort approximate should be enough in practice. ## How was this patch tested? Added regression tests. Author: Davies Liu <[email protected]> Closes #13652 from davies/fix_timezone.
1 parent ce3b98b commit 001a589

File tree

4 files changed

+129
-4
lines changed

4 files changed

+129
-4
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,8 @@ object DateTimeUtils {
100100

101101
// reverse of millisToDays
102102
def daysToMillis(days: SQLDate): Long = {
103-
val millisUtc = days.toLong * MILLIS_PER_DAY
104-
millisUtc - threadLocalLocalTimeZone.get().getOffset(millisUtc)
103+
val millisLocal = days.toLong * MILLIS_PER_DAY
104+
millisLocal - getOffsetFromLocalMillis(millisLocal, threadLocalLocalTimeZone.get())
105105
}
106106

107107
def dateToString(days: SQLDate): String =
@@ -850,6 +850,41 @@ object DateTimeUtils {
850850
}
851851
}
852852

853+
/**
854+
* Lookup the offset for given millis seconds since 1970-01-01 00:00:00 in given timezone.
855+
*/
856+
private def getOffsetFromLocalMillis(millisLocal: Long, tz: TimeZone): Long = {
857+
var guess = tz.getRawOffset
858+
// the actual offset should be calculated based on milliseconds in UTC
859+
val offset = tz.getOffset(millisLocal - guess)
860+
if (offset != guess) {
861+
guess = tz.getOffset(millisLocal - offset)
862+
if (guess != offset) {
863+
// fallback to do the reverse lookup using java.sql.Timestamp
864+
// this should only happen near the start or end of DST
865+
val days = Math.floor(millisLocal.toDouble / MILLIS_PER_DAY).toInt
866+
val year = getYear(days)
867+
val month = getMonth(days)
868+
val day = getDayOfMonth(days)
869+
870+
var millisOfDay = (millisLocal % MILLIS_PER_DAY).toInt
871+
if (millisOfDay < 0) {
872+
millisOfDay += MILLIS_PER_DAY.toInt
873+
}
874+
val seconds = (millisOfDay / 1000L).toInt
875+
val hh = seconds / 3600
876+
val mm = seconds / 60 % 60
877+
val ss = seconds % 60
878+
val nano = millisOfDay % 1000 * 1000000
879+
880+
// create a Timestamp to get the unix timestamp (in UTC)
881+
val timestamp = new Timestamp(year - 1900, month - 1, day, hh, mm, ss, nano)
882+
guess = (millisLocal - timestamp.getTime).toInt
883+
}
884+
}
885+
guess
886+
}
887+
853888
/**
854889
* Returns a timestamp of given timezone from utc timestamp, with the same string
855890
* representation in their timezone.
@@ -866,7 +901,17 @@ object DateTimeUtils {
866901
*/
867902
def toUTCTime(time: SQLTimestamp, timeZone: String): SQLTimestamp = {
868903
val tz = TimeZone.getTimeZone(timeZone)
869-
val offset = tz.getOffset(time / 1000L)
904+
val offset = getOffsetFromLocalMillis(time / 1000L, tz)
870905
time - offset * 1000L
871906
}
907+
908+
/**
909+
* Re-initialize the current thread's thread locals. Exposed for testing.
910+
*/
911+
private[util] def resetThreadLocals(): Unit = {
912+
threadLocalGmtCalendar.remove()
913+
threadLocalLocalTimeZone.remove()
914+
threadLocalTimestampFormat.remove()
915+
threadLocalDateFormat.remove()
916+
}
872917
}

sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.ScalaReflectionLock
3030
*
3131
* Please use the singleton [[DataTypes.DateType]].
3232
*
33-
* Internally, this is represented as the number of days from epoch (1970-01-01 00:00:00 UTC).
33+
* Internally, this is represented as the number of days from 1970-01-01.
3434
*/
3535
@DeveloperApi
3636
class DateType private() extends AtomicType {
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.catalyst.util
19+
20+
import java.util.TimeZone
21+
22+
/**
23+
* Helper functions for testing date and time functionality.
24+
*/
25+
object DateTimeTestUtils {
26+
27+
val ALL_TIMEZONES: Seq[TimeZone] = TimeZone.getAvailableIDs.toSeq.map(TimeZone.getTimeZone)
28+
29+
def withDefaultTimeZone[T](newDefaultTimeZone: TimeZone)(block: => T): T = {
30+
val originalDefaultTimeZone = TimeZone.getDefault
31+
try {
32+
DateTimeUtils.resetThreadLocals()
33+
TimeZone.setDefault(newDefaultTimeZone)
34+
block
35+
} finally {
36+
TimeZone.setDefault(originalDefaultTimeZone)
37+
DateTimeUtils.resetThreadLocals()
38+
}
39+
}
40+
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -492,6 +492,13 @@ class DateTimeUtilsSuite extends SparkFunSuite {
492492
test("2011-12-25 09:00:00.123456", "JST", "2011-12-25 18:00:00.123456")
493493
test("2011-12-25 09:00:00.123456", "PST", "2011-12-25 01:00:00.123456")
494494
test("2011-12-25 09:00:00.123456", "Asia/Shanghai", "2011-12-25 17:00:00.123456")
495+
496+
// Daylight Saving Time
497+
test("2016-03-13 09:59:59.0", "PST", "2016-03-13 01:59:59.0")
498+
test("2016-03-13 10:00:00.0", "PST", "2016-03-13 03:00:00.0")
499+
test("2016-11-06 08:59:59.0", "PST", "2016-11-06 01:59:59.0")
500+
test("2016-11-06 09:00:00.0", "PST", "2016-11-06 01:00:00.0")
501+
test("2016-11-06 10:00:00.0", "PST", "2016-11-06 02:00:00.0")
495502
}
496503

497504
test("to UTC timestamp") {
@@ -503,5 +510,38 @@ class DateTimeUtilsSuite extends SparkFunSuite {
503510
test("2011-12-25 18:00:00.123456", "JST", "2011-12-25 09:00:00.123456")
504511
test("2011-12-25 01:00:00.123456", "PST", "2011-12-25 09:00:00.123456")
505512
test("2011-12-25 17:00:00.123456", "Asia/Shanghai", "2011-12-25 09:00:00.123456")
513+
514+
// Daylight Saving Time
515+
test("2016-03-13 01:59:59", "PST", "2016-03-13 09:59:59.0")
516+
// 2016-03-13 02:00:00 PST does not exists
517+
test("2016-03-13 02:00:00", "PST", "2016-03-13 10:00:00.0")
518+
test("2016-03-13 03:00:00", "PST", "2016-03-13 10:00:00.0")
519+
test("2016-11-06 00:59:59", "PST", "2016-11-06 07:59:59.0")
520+
// 2016-11-06 01:00:00 PST could be 2016-11-06 08:00:00 UTC or 2016-11-06 09:00:00 UTC
521+
test("2016-11-06 01:00:00", "PST", "2016-11-06 09:00:00.0")
522+
test("2016-11-06 01:59:59", "PST", "2016-11-06 09:59:59.0")
523+
test("2016-11-06 02:00:00", "PST", "2016-11-06 10:00:00.0")
524+
}
525+
526+
test("daysToMillis and millisToDays") {
527+
// There are some days are skipped entirely in some timezone, skip them here.
528+
val skipped_days = Map[String, Int](
529+
"Kwajalein" -> 8632,
530+
"Pacific/Apia" -> 15338,
531+
"Pacific/Enderbury" -> 9131,
532+
"Pacific/Fakaofo" -> 15338,
533+
"Pacific/Kiritimati" -> 9131,
534+
"Pacific/Kwajalein" -> 8632,
535+
"MIT" -> 15338)
536+
for (tz <- DateTimeTestUtils.ALL_TIMEZONES) {
537+
DateTimeTestUtils.withDefaultTimeZone(tz) {
538+
val skipped = skipped_days.getOrElse(tz.getID, Int.MinValue)
539+
(-20000 to 20000).foreach { d =>
540+
if (d != skipped) {
541+
assert(millisToDays(daysToMillis(d)) === d)
542+
}
543+
}
544+
}
545+
}
506546
}
507547
}

0 commit comments

Comments
 (0)