Skip to content

Commit 67f2030

Browse files
ToruitasPhotonios
authored andcommitted
Add in Hourly Partition Support
1 parent 182ff72 commit 67f2030

File tree

7 files changed

+136
-10
lines changed

7 files changed

+136
-10
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,6 @@ build/
2929

3030
# Ignore PyCharm / IntelliJ files
3131
.idea/
32+
build/
33+
.python-version
34+
docker-compose.yml

docs/source/table_partitioning.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,17 @@ Time-based partitioning
292292
count=12,
293293
),
294294
),
295+
296+
# 24 partitions ahead, each partition is 1 hour, for a total of 24 hours. Starting with hour 0 of current day
297+
# old partitions are never deleted, `max_age` is not set
298+
# partitions will be named `[table_name]_[year]_[month]_[month day number]_[hour (24h)]:00:00`.
299+
PostgresPartitioningConfig(
300+
model=MyPartitionedModel,
301+
strategy=PostgresCurrentTimePartitioningStrategy(
302+
size=PostgresTimePartitionSize(hours=1),
303+
count=24,
304+
),
305+
),
295306
])
296307
297308

psqlextra/partitioning/current_time_strategy.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ class PostgresCurrentTimePartitioningStrategy(
1616
1717
All buckets will be equal in size and start at the start of the
1818
unit. With monthly partitioning, partitions start on the 1st and
19-
with weekly partitioning, partitions start on monday.
19+
with weekly partitioning, partitions start on monday, with hourly
20+
partitioning, partitions start at 00:00.
2021
"""
2122

2223
def __init__(

psqlextra/partitioning/shorthands.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ def partition_by_current_time(
1616
months: Optional[int] = None,
1717
weeks: Optional[int] = None,
1818
days: Optional[int] = None,
19+
hours: Optional[int] = None,
1920
max_age: Optional[relativedelta] = None,
2021
name_format: Optional[str] = None,
2122
) -> PostgresPartitioningConfig:
@@ -43,6 +44,9 @@ def partition_by_current_time(
4344
days:
4445
The amount of days each partition should contain.
4546
47+
hours:
48+
The amount of hours each partition should contain.
49+
4650
max_age:
4751
The maximum age of a partition (calculated from the
4852
start of the partition).
@@ -56,7 +60,7 @@ def partition_by_current_time(
5660
"""
5761

5862
size = PostgresTimePartitionSize(
59-
years=years, months=months, weeks=weeks, days=days
63+
years=years, months=months, weeks=weeks, days=days, hours=hours
6064
)
6165

6266
return PostgresPartitioningConfig(

psqlextra/partitioning/time_partition.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ class PostgresTimePartition(PostgresRangePartition):
2020
PostgresTimePartitionUnit.MONTHS: "%Y_%b",
2121
PostgresTimePartitionUnit.WEEKS: "%Y_week_%W",
2222
PostgresTimePartitionUnit.DAYS: "%Y_%b_%d",
23+
PostgresTimePartitionUnit.HOURS: "%Y_%b_%d_%H:00:00",
2324
}
2425

2526
def __init__(
@@ -31,8 +32,8 @@ def __init__(
3132
end_datetime = start_datetime + size.as_delta()
3233

3334
super().__init__(
34-
from_values=start_datetime.strftime("%Y-%m-%d"),
35-
to_values=end_datetime.strftime("%Y-%m-%d"),
35+
from_values=start_datetime.strftime("%Y-%m-%d %H:00:00"),
36+
to_values=end_datetime.strftime("%Y-%m-%d %H:00:00"),
3637
)
3738

3839
self.size = size

psqlextra/partitioning/time_partition_size.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ class PostgresTimePartitionUnit(enum.Enum):
1313
MONTHS = "months"
1414
WEEKS = "weeks"
1515
DAYS = "days"
16+
HOURS = "hours"
1617

1718

1819
UNIX_EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc)
@@ -31,9 +32,10 @@ def __init__(
3132
months: Optional[int] = None,
3233
weeks: Optional[int] = None,
3334
days: Optional[int] = None,
35+
hours: Optional[int] = None,
3436
anchor: datetime = UNIX_EPOCH,
3537
) -> None:
36-
sizes = [years, months, weeks, days]
38+
sizes = [years, months, weeks, days, hours]
3739

3840
if not any(sizes):
3941
raise PostgresPartitioningError("Partition cannot be 0 in size.")
@@ -56,6 +58,9 @@ def __init__(
5658
elif days:
5759
self.unit = PostgresTimePartitionUnit.DAYS
5860
self.value = days
61+
elif hours:
62+
self.unit = PostgresTimePartitionUnit.HOURS
63+
self.value = hours
5964
else:
6065
raise PostgresPartitioningError(
6166
"Unsupported time partitioning unit"
@@ -74,6 +79,9 @@ def as_delta(self) -> relativedelta:
7479
if self.unit == PostgresTimePartitionUnit.DAYS:
7580
return relativedelta(days=self.value)
7681

82+
if self.unit == PostgresTimePartitionUnit.HOURS:
83+
return relativedelta(hours=self.value)
84+
7785
raise PostgresPartitioningError(
7886
"Unsupported time partitioning unit: %s" % self.unit
7987
)
@@ -88,14 +96,21 @@ def start(self, dt: datetime) -> datetime:
8896
if self.unit == PostgresTimePartitionUnit.WEEKS:
8997
return self._ensure_datetime(dt - relativedelta(days=dt.weekday()))
9098

91-
diff_days = (dt - self.anchor).days
92-
partition_index = diff_days // self.value
93-
start = self.anchor + timedelta(days=partition_index * self.value)
94-
return self._ensure_datetime(start)
99+
if self.unit == PostgresTimePartitionUnit.DAYS:
100+
diff_days = (dt - self.anchor).days
101+
partition_index = diff_days // self.value
102+
start = self.anchor + timedelta(days=partition_index * self.value)
103+
return self._ensure_datetime(start)
104+
105+
if self.unit == PostgresTimePartitionUnit.HOURS:
106+
return self._ensure_datetime(dt.replace(hour=0))
107+
108+
raise ValueError("Unknown unit")
95109

96110
@staticmethod
97111
def _ensure_datetime(dt: Union[date, datetime]) -> datetime:
98-
return datetime(year=dt.year, month=dt.month, day=dt.day)
112+
hour = dt.hour if isinstance(dt, datetime) else 0
113+
return datetime(year=dt.year, month=dt.month, day=dt.day, hour=hour)
99114

100115
def __repr__(self) -> str:
101116
return "PostgresTimePartitionSize<%s, %s>" % (self.unit, self.value)

tests/test_partitioning_time.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,56 @@ def test_partitioning_time_daily_apply():
254254
assert table.partitions[6].name == "2019_jun_04"
255255

256256

257+
@pytest.mark.postgres_version(lt=110000)
258+
def test_partitioning_time_hourly_apply():
259+
"""Tests whether automatically creating new partitions ahead hourly works
260+
as expected."""
261+
262+
model = define_fake_partitioned_model(
263+
{"timestamp": models.DateTimeField()}, {"key": ["timestamp"]}
264+
)
265+
266+
schema_editor = connection.schema_editor()
267+
schema_editor.create_partitioned_model(model)
268+
269+
# create partitions for the next 4 hours (including the current)
270+
with freezegun.freeze_time("2019-1-23"):
271+
manager = PostgresPartitioningManager(
272+
[partition_by_current_time(model, hours=1, count=4)]
273+
)
274+
manager.plan().apply()
275+
276+
table = _get_partitioned_table(model)
277+
assert len(table.partitions) == 4
278+
assert table.partitions[0].name == "2019_jan_23_00:00:00"
279+
assert table.partitions[1].name == "2019_jan_23_01:00:00"
280+
assert table.partitions[2].name == "2019_jan_23_02:00:00"
281+
assert table.partitions[3].name == "2019_jan_23_03:00:00"
282+
283+
# re-running it with 5, should just create one additional partition
284+
with freezegun.freeze_time("2019-1-23"):
285+
manager = PostgresPartitioningManager(
286+
[partition_by_current_time(model, hours=1, count=5)]
287+
)
288+
manager.plan().apply()
289+
290+
table = _get_partitioned_table(model)
291+
assert len(table.partitions) == 5
292+
assert table.partitions[4].name == "2019_jan_23_04:00:00"
293+
294+
# it's june now, we want to partition two hours ahead
295+
with freezegun.freeze_time("2019-06-03"):
296+
manager = PostgresPartitioningManager(
297+
[partition_by_current_time(model, hours=1, count=2)]
298+
)
299+
manager.plan().apply()
300+
301+
table = _get_partitioned_table(model)
302+
assert len(table.partitions) == 7
303+
assert table.partitions[5].name == "2019_jun_03_00:00:00"
304+
assert table.partitions[6].name == "2019_jun_03_01:00:00"
305+
306+
257307
@pytest.mark.postgres_version(lt=110000)
258308
def test_partitioning_time_consistent_daily_apply():
259309
"""Ensures that automatic daily partition creation is consistent and
@@ -415,11 +465,52 @@ def test_partitioning_time_daily_apply_insert():
415465
model.objects.create(timestamp=datetime.date(2019, 1, 10))
416466

417467

468+
@pytest.mark.postgres_version(lt=110000)
469+
def test_partitioning_time_hourly_apply_insert():
470+
"""Tests whether automatically created hourly partitions line up
471+
perfectly."""
472+
473+
model = define_fake_partitioned_model(
474+
{"timestamp": models.DateTimeField()}, {"key": ["timestamp"]}
475+
)
476+
477+
schema_editor = connection.schema_editor()
478+
schema_editor.create_partitioned_model(model)
479+
480+
# that's a monday
481+
with freezegun.freeze_time("2019-1-07"):
482+
manager = PostgresPartitioningManager(
483+
[partition_by_current_time(model, hours=1, count=2)]
484+
)
485+
manager.plan().apply()
486+
487+
table = _get_partitioned_table(model)
488+
assert len(table.partitions) == 2
489+
490+
model.objects.create(timestamp=datetime.datetime(2019, 1, 7, 0))
491+
model.objects.create(timestamp=datetime.datetime(2019, 1, 7, 1))
492+
493+
with transaction.atomic():
494+
with pytest.raises(IntegrityError):
495+
model.objects.create(timestamp=datetime.datetime(2019, 1, 7, 2))
496+
model.objects.create(timestamp=datetime.datetime(2019, 1, 7, 3))
497+
498+
with freezegun.freeze_time("2019-1-07"):
499+
manager = PostgresPartitioningManager(
500+
[partition_by_current_time(model, hours=1, count=4)]
501+
)
502+
manager.plan().apply()
503+
504+
model.objects.create(timestamp=datetime.datetime(2019, 1, 7, 2))
505+
model.objects.create(timestamp=datetime.datetime(2019, 1, 7, 3))
506+
507+
418508
@pytest.mark.postgres_version(lt=110000)
419509
@pytest.mark.parametrize(
420510
"kwargs,partition_names",
421511
[
422512
(dict(days=2), ["2018_dec_31", "2019_jan_02"]),
513+
(dict(hours=2), ["2019_jan_01_00:00:00", "2019_jan_01_02:00:00"]),
423514
(dict(weeks=2), ["2018_week_53", "2019_week_02"]),
424515
(dict(months=2), ["2019_jan", "2019_mar"]),
425516
(dict(years=2), ["2019", "2021"]),

0 commit comments

Comments
 (0)