Skip to content

Commit 41efd20

Browse files
Davies Liudavies
authored andcommitted
[SPARK-15613] [SQL] Fix incorrect days to millis conversion due to Daylight Saving Time
## What changes were proposed in this pull request? Internally, we use Int to represent a date (the days since 1970-01-01), when we convert that into unix timestamp (milli-seconds since epoch in UTC), we get the offset of a timezone using local millis (the milli-seconds since 1970-01-01 in a timezone), but TimeZone.getOffset() expect unix timestamp, the result could be off by one hour (in Daylight Saving Time (DST) or not). This PR change to use best effort approximate of posix timestamp to lookup the offset. In the event of changing of DST, Some time is not defined (for example, 2016-03-13 02:00:00 PST), or could lead to multiple valid result in UTC (for example, 2016-11-06 01:00:00), this best effort approximate should be enough in practice. ## How was this patch tested? Added regression tests. Author: Davies Liu <[email protected]> Closes #13652 from davies/fix_timezone. (cherry picked from commit 001a589) Signed-off-by: Davies Liu <[email protected]>
1 parent 3f1d730 commit 41efd20

File tree

4 files changed

+129
-4
lines changed

4 files changed

+129
-4
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,8 @@ object DateTimeUtils {
8989

9090
// reverse of millisToDays
9191
def daysToMillis(days: SQLDate): Long = {
92-
val millisUtc = days.toLong * MILLIS_PER_DAY
93-
millisUtc - threadLocalLocalTimeZone.get().getOffset(millisUtc)
92+
val millisLocal = days.toLong * MILLIS_PER_DAY
93+
millisLocal - getOffsetFromLocalMillis(millisLocal, threadLocalLocalTimeZone.get())
9494
}
9595

9696
def dateToString(days: SQLDate): String =
@@ -819,6 +819,41 @@ object DateTimeUtils {
819819
}
820820
}
821821

822+
/**
823+
* Lookup the offset for given millis seconds since 1970-01-01 00:00:00 in given timezone.
824+
*/
825+
private def getOffsetFromLocalMillis(millisLocal: Long, tz: TimeZone): Long = {
826+
var guess = tz.getRawOffset
827+
// the actual offset should be calculated based on milliseconds in UTC
828+
val offset = tz.getOffset(millisLocal - guess)
829+
if (offset != guess) {
830+
guess = tz.getOffset(millisLocal - offset)
831+
if (guess != offset) {
832+
// fallback to do the reverse lookup using java.sql.Timestamp
833+
// this should only happen near the start or end of DST
834+
val days = Math.floor(millisLocal.toDouble / MILLIS_PER_DAY).toInt
835+
val year = getYear(days)
836+
val month = getMonth(days)
837+
val day = getDayOfMonth(days)
838+
839+
var millisOfDay = (millisLocal % MILLIS_PER_DAY).toInt
840+
if (millisOfDay < 0) {
841+
millisOfDay += MILLIS_PER_DAY.toInt
842+
}
843+
val seconds = (millisOfDay / 1000L).toInt
844+
val hh = seconds / 3600
845+
val mm = seconds / 60 % 60
846+
val ss = seconds % 60
847+
val nano = millisOfDay % 1000 * 1000000
848+
849+
// create a Timestamp to get the unix timestamp (in UTC)
850+
val timestamp = new Timestamp(year - 1900, month - 1, day, hh, mm, ss, nano)
851+
guess = (millisLocal - timestamp.getTime).toInt
852+
}
853+
}
854+
guess
855+
}
856+
822857
/**
823858
* Returns a timestamp of given timezone from utc timestamp, with the same string
824859
* representation in their timezone.
@@ -835,7 +870,17 @@ object DateTimeUtils {
835870
*/
836871
def toUTCTime(time: SQLTimestamp, timeZone: String): SQLTimestamp = {
837872
val tz = TimeZone.getTimeZone(timeZone)
838-
val offset = tz.getOffset(time / 1000L)
873+
val offset = getOffsetFromLocalMillis(time / 1000L, tz)
839874
time - offset * 1000L
840875
}
876+
877+
/**
878+
* Re-initialize the current thread's thread locals. Exposed for testing.
879+
*/
880+
private[util] def resetThreadLocals(): Unit = {
881+
threadLocalGmtCalendar.remove()
882+
threadLocalLocalTimeZone.remove()
883+
threadLocalTimestampFormat.remove()
884+
threadLocalDateFormat.remove()
885+
}
841886
}

sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.ScalaReflectionLock
3030
*
3131
* Please use the singleton [[DataTypes.DateType]].
3232
*
33-
* Internally, this is represented as the number of days from epoch (1970-01-01 00:00:00 UTC).
33+
* Internally, this is represented as the number of days from 1970-01-01.
3434
*/
3535
@DeveloperApi
3636
class DateType private() extends AtomicType {
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.catalyst.util
19+
20+
import java.util.TimeZone
21+
22+
/**
23+
* Helper functions for testing date and time functionality.
24+
*/
25+
object DateTimeTestUtils {
26+
27+
val ALL_TIMEZONES: Seq[TimeZone] = TimeZone.getAvailableIDs.toSeq.map(TimeZone.getTimeZone)
28+
29+
def withDefaultTimeZone[T](newDefaultTimeZone: TimeZone)(block: => T): T = {
30+
val originalDefaultTimeZone = TimeZone.getDefault
31+
try {
32+
DateTimeUtils.resetThreadLocals()
33+
TimeZone.setDefault(newDefaultTimeZone)
34+
block
35+
} finally {
36+
TimeZone.setDefault(originalDefaultTimeZone)
37+
DateTimeUtils.resetThreadLocals()
38+
}
39+
}
40+
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -476,6 +476,13 @@ class DateTimeUtilsSuite extends SparkFunSuite {
476476
test("2011-12-25 09:00:00.123456", "JST", "2011-12-25 18:00:00.123456")
477477
test("2011-12-25 09:00:00.123456", "PST", "2011-12-25 01:00:00.123456")
478478
test("2011-12-25 09:00:00.123456", "Asia/Shanghai", "2011-12-25 17:00:00.123456")
479+
480+
// Daylight Saving Time
481+
test("2016-03-13 09:59:59.0", "PST", "2016-03-13 01:59:59.0")
482+
test("2016-03-13 10:00:00.0", "PST", "2016-03-13 03:00:00.0")
483+
test("2016-11-06 08:59:59.0", "PST", "2016-11-06 01:59:59.0")
484+
test("2016-11-06 09:00:00.0", "PST", "2016-11-06 01:00:00.0")
485+
test("2016-11-06 10:00:00.0", "PST", "2016-11-06 02:00:00.0")
479486
}
480487

481488
test("to UTC timestamp") {
@@ -487,5 +494,38 @@ class DateTimeUtilsSuite extends SparkFunSuite {
487494
test("2011-12-25 18:00:00.123456", "JST", "2011-12-25 09:00:00.123456")
488495
test("2011-12-25 01:00:00.123456", "PST", "2011-12-25 09:00:00.123456")
489496
test("2011-12-25 17:00:00.123456", "Asia/Shanghai", "2011-12-25 09:00:00.123456")
497+
498+
// Daylight Saving Time
499+
test("2016-03-13 01:59:59", "PST", "2016-03-13 09:59:59.0")
500+
// 2016-03-13 02:00:00 PST does not exists
501+
test("2016-03-13 02:00:00", "PST", "2016-03-13 10:00:00.0")
502+
test("2016-03-13 03:00:00", "PST", "2016-03-13 10:00:00.0")
503+
test("2016-11-06 00:59:59", "PST", "2016-11-06 07:59:59.0")
504+
// 2016-11-06 01:00:00 PST could be 2016-11-06 08:00:00 UTC or 2016-11-06 09:00:00 UTC
505+
test("2016-11-06 01:00:00", "PST", "2016-11-06 09:00:00.0")
506+
test("2016-11-06 01:59:59", "PST", "2016-11-06 09:59:59.0")
507+
test("2016-11-06 02:00:00", "PST", "2016-11-06 10:00:00.0")
508+
}
509+
510+
test("daysToMillis and millisToDays") {
511+
// There are some days are skipped entirely in some timezone, skip them here.
512+
val skipped_days = Map[String, Int](
513+
"Kwajalein" -> 8632,
514+
"Pacific/Apia" -> 15338,
515+
"Pacific/Enderbury" -> 9131,
516+
"Pacific/Fakaofo" -> 15338,
517+
"Pacific/Kiritimati" -> 9131,
518+
"Pacific/Kwajalein" -> 8632,
519+
"MIT" -> 15338)
520+
for (tz <- DateTimeTestUtils.ALL_TIMEZONES) {
521+
DateTimeTestUtils.withDefaultTimeZone(tz) {
522+
val skipped = skipped_days.getOrElse(tz.getID, Int.MinValue)
523+
(-20000 to 20000).foreach { d =>
524+
if (d != skipped) {
525+
assert(millisToDays(daysToMillis(d)) === d)
526+
}
527+
}
528+
}
529+
}
490530
}
491531
}

0 commit comments

Comments
 (0)