Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions blog/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from django.db.models.functions import Length
from django.db.models import F
from django import forms
from xml.etree import ElementTree
from .models import (
Entry,
Tag,
Expand Down Expand Up @@ -44,10 +43,13 @@ class MyEntryForm(forms.ModelForm):
def clean_body(self):
# Ensure this is valid XML
body = self.cleaned_data["body"]
try:
ElementTree.fromstring("<entry>%s</entry>" % body)
except Exception as e:
raise forms.ValidationError(str(e))
if not self.cleaned_data.get("use_markdown"):
from xml.etree import ElementTree

try:
ElementTree.fromstring("<entry>%s</entry>" % body)
except Exception as e:
raise forms.ValidationError(str(e))
return body


Expand Down
3 changes: 2 additions & 1 deletion blog/feeds.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from django.contrib.syndication.views import Feed
from django.utils.dateformat import format as date_format
from django.utils.feedgenerator import Atom1Feed
from django.utils.safestring import mark_safe
from django.http import HttpResponse
from blog.models import Entry, Blogmark, Quotation, Note

Expand Down Expand Up @@ -63,7 +64,7 @@ def item_description(self, item):
'Subscribe to <a href="https://simonwillison.net/atom/everything/">/atom/everything/</a> '
'to get all of my posts, or take a look at my <a href="https://simonwillison.net/about/#subscribe">other subscription options</a>.</em></p>'
)
return item.body + note
return mark_safe(str(item.rendered) + note)


class Blogmarks(Base):
Expand Down
5 changes: 4 additions & 1 deletion blog/management/commands/validate_entries_xml.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from django.core.management.base import BaseCommand
from xml.etree import ElementTree
from blog.models import Entry


Expand All @@ -8,7 +7,11 @@ class Command(BaseCommand):

def handle(self, *args, **kwargs):
for entry in Entry.objects.all():
if entry.use_markdown:
continue
try:
from xml.etree import ElementTree

ElementTree.fromstring("<entry>%s</entry>" % entry.body.encode("utf8"))
except Exception as e:
print(e)
Expand Down
21 changes: 21 additions & 0 deletions blog/migrations/0030_entry_use_markdown.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Generated by Django 5.1.4 on 2025-10-04 19:07

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("blog", "0029_blogmark_title"),
]

operations = [
migrations.AddField(
model_name="entry",
name="use_markdown",
field=models.BooleanField(
default=False,
help_text='Images can use the img element - set width="..." for a specific width and use class="blogmark-image" to center and add a drop shadow.',
),
),
]
22 changes: 17 additions & 5 deletions blog/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import arrow
import datetime
from markdown import markdown
from xml.etree import ElementTree
from bs4 import BeautifulSoup

tag_re = re.compile("^[a-z0-9]+$")

Expand Down Expand Up @@ -224,6 +224,10 @@ class Meta:
class Entry(BaseModel):
title = models.CharField(max_length=255)
body = models.TextField()
use_markdown = models.BooleanField(
default=False,
help_text='Images can use the img element - set width="..." for a specific width and use class="blogmark-image" to center and add a drop shadow.',
)
tweet_html = models.TextField(
blank=True,
null=True,
Expand Down Expand Up @@ -251,13 +255,13 @@ def previous_by_created(self):

def images(self):
"""Extracts images from entry.body"""
et = ElementTree.fromstring("<entry>%s</entry>" % self.body)
return [i.attrib for i in et.findall(".//img")]
soup = BeautifulSoup(str(self.rendered), "html.parser")
return [dict(img.attrs) for img in soup.find_all("img")]

def index_components(self):
return {
"A": self.title,
"C": strip_tags(self.body),
"C": strip_tags(str(self.rendered)),
"B": " ".join(self.tags.values_list("tag", flat=True)),
}

Expand Down Expand Up @@ -286,14 +290,22 @@ def series_info(self):
}

def multi_paragraph(self):
return self.body.count("<p") > 1
return str(self.rendered).count("<p") > 1

def __str__(self):
return self.title

class Meta(BaseModel.Meta):
verbose_name_plural = "Entries"

@property
def rendered(self):
if self.use_markdown:
rendered = markdown(self.body or "")
else:
rendered = self.body or ""
return mark_safe(rendered)


class LiveUpdate(models.Model):
created = models.DateTimeField(auto_now_add=True)
Expand Down
95 changes: 56 additions & 39 deletions blog/templatetags/entry_tags.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
from django import template
from django.utils.html import conditional_escape
from django.utils.safestring import mark_safe
from xml.etree import ElementTree
from bs4 import BeautifulSoup, NavigableString, Comment
import re
import datetime

register = template.Library()
entry_stripper = re.compile("^<entry>(.*?)</entry>$", re.DOTALL)


@register.filter
def xhtml(xhtml):
return XhtmlString(xhtml, contains_markup=True)
Expand All @@ -17,32 +14,38 @@ def xhtml(xhtml):
class XhtmlString(object):
def __init__(self, value, contains_markup=False):
if isinstance(value, XhtmlString):
self.et = value.et
self.soup = value.soup
else:
if value is None:
value = ""
if not contains_markup:
# Handle strings like "this & that"
value = conditional_escape(value)
self.et = ElementTree.fromstring("<entry>%s</entry>" % value)
self.soup = BeautifulSoup(f"<entry>{value}</entry>", "html.parser")
self.et = self.soup.find("entry")

def __str__(self):
m = entry_stripper.match(ElementTree.tostring(self.et, "unicode"))
if m:
return mark_safe(m.group(1))
else:
return "" # If we end up with <entry />
if not self.et:
return ""
return mark_safe("".join(str(content) for content in self.et.contents))


@register.filter
def resize_images_to_fit_width(value, arg):
max_width = int(arg)
x = XhtmlString(value)
for img in x.et.findall(".//img"):
width = int(img.get("width", 0))
height = int(img.get("height", 0))
if width > max_width:
if not x.et:
return x
for img in x.et.find_all("img"):
try:
width = int(img.get("width", 0))
height = int(img.get("height", 0))
except (TypeError, ValueError):
continue
if width > max_width and height:
# Scale down
img.set("width", str(max_width))
img.set("height", str(int(float(max_width) / width * height)))
img["width"] = str(max_width)
img["height"] = str(int(float(max_width) / width * height))
return x


Expand All @@ -64,23 +67,27 @@ def split_cutoff(xhtml):
@register.filter
def remove_context_paragraph(xhtml):
x = XhtmlString(xhtml)
if not x.et:
return x
p = x.et.find("p")
if p is None:
return x
xhtml = ElementTree.tostring(p, "unicode")
xhtml = str(p)
if xhtml.startswith("<p><em>My answer to") or xhtml.startswith(
'<p class="context">'
):
x.et.remove(p)
p.decompose()
return x


@register.filter
def first_paragraph(xhtml):
x = XhtmlString(xhtml)
if not x.et:
return mark_safe("<p>%s</p>" % xhtml)
p = x.et.find("p")
if p is not None:
return mark_safe(ElementTree.tostring(p, "unicode"))
return mark_safe(str(p))
else:
return mark_safe("<p>%s</p>" % xhtml)

Expand All @@ -104,9 +111,11 @@ def ends_with_punctuation(value):
@register.filter
def strip_p_ids(xhtml):
x = XhtmlString(xhtml)
for p in x.et.findall(".//p"):
if "id" in p.attrib:
del p.attrib["id"]
if not x.et:
return x
for p in x.et.find_all("p"):
if "id" in p.attrs:
del p.attrs["id"]
return x


Expand All @@ -120,18 +129,26 @@ def break_up_long_words(xhtml, length):


def do_break_long_words(et, length):
"""Pass an ElementTree instance; breaks up long words in it"""
if et.text:
et.text = do_break_long_words_string(et.text, length)
for child in et:
do_break_long_words(child, length)
if et.tail:
et.tail = do_break_long_words_string(et.tail, length)
"""Pass a BeautifulSoup Tag instance; breaks up long words in it"""
if et is None:
return
for node in list(_iter_text_nodes(et)):
new_text = do_break_long_words_string(str(node), length)
if new_text != str(node):
node.replace_with(new_text)


whitespace_re = re.compile(r"(\s+)")


def _iter_text_nodes(tag):
if tag is None:
return []
for node in tag.descendants:
if isinstance(node, NavigableString) and not isinstance(node, Comment):
yield node


def do_break_long_words_string(s, length):
bits = whitespace_re.split(s)
for i, bit in enumerate(bits):
Expand Down Expand Up @@ -167,15 +184,15 @@ def strip_wrapping_p(xhtml):


def do_typography(et):
# Designed to be called recursively on ElementTree objects
if et.tag not in ("pre", "code"):
# Don't do et.text or children for those tags; just do et.tail
if et.text:
et.text = do_typography_string(et.text)
for child in et:
do_typography(child)
if et.tail:
et.tail = do_typography_string(et.tail)
if et is None:
return
for node in list(_iter_text_nodes(et)):
parent = node.parent
if parent and parent.name in ("pre", "code"):
continue
new_text = do_typography_string(str(node))
if new_text != str(node):
node.replace_with(new_text)


LEFT_DOUBLE_QUOTATION_MARK = "\u201c"
Expand Down
43 changes: 43 additions & 0 deletions blog/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,49 @@ def test_markup(self):
""".strip(),
)

def test_markdown_entry_renders_everywhere(self):
tag = Tag.objects.create(tag="markdown-test")
entry = EntryFactory(
title="Markdown Entry",
body="This is **markdown** text.",
use_markdown=True,
)
entry.tags.add(tag)

year = entry.created.year
month = entry.created.strftime("%b")
day = entry.created.day

paths = {
"/": True,
f"/{year}/": False,
f"/{year}/{month}/": True,
f"/{year}/{month}/{day}/": True,
entry.get_absolute_url(): True,
f"/tags/{tag.tag}/": True,
f"/search/?tag={tag.tag}": True,
}

for path, should_have_markup in paths.items():
response = self.client.get(path)
self.assertEqual(response.status_code, 200, msg=path)
if should_have_markup and "<strong>markdown</strong>" not in response.content.decode():
self.fail(f"{path} missing rendered markdown")

entries_feed = self.client.get("/atom/entries/")
self.assertEqual(entries_feed.status_code, 200)
self.assertIn(
"&lt;strong&gt;markdown&lt;/strong&gt;",
entries_feed.content.decode(),
)

everything_feed = self.client.get("/atom/everything/")
self.assertEqual(everything_feed.status_code, 200)
self.assertIn(
"&lt;strong&gt;markdown&lt;/strong&gt;",
everything_feed.content.decode(),
)

def test_update_blogmark_runs_commit_hooks(self):
# This was throwing errors on upgrade Django 2.2 to 2.2.1
blogmark = BlogmarkFactory()
Expand Down
4 changes: 2 additions & 2 deletions templates/entry.html
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
{% endblock %}

{% block card_title %}{{ entry.title|typography }}{% endblock %}
{% block card_description %}{{ entry.body|xhtml|remove_context_paragraph|typography|xhtml2html|striptags|truncatewords:30|force_escape }}{% endblock %}
{% block card_description %}{{ entry.rendered|xhtml|remove_context_paragraph|typography|xhtml2html|striptags|truncatewords:30|force_escape }}{% endblock %}

{% block item_content %}
<div data-permalink-context="{{ entry.get_absolute_url }}">
Expand All @@ -18,7 +18,7 @@ <h2>{{ entry.title|typography }}</h2>

{% include "_draft_warning.html" %}

{{ entry.body|xhtml|resize_images_to_fit_width:"450"|typography|xhtml2html }}
{{ entry.rendered|xhtml|resize_images_to_fit_width:"450"|typography|xhtml2html }}

{% if updates %}
<div id="live-updates">
Expand Down
2 changes: 1 addition & 1 deletion templates/feeds/everything.html
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{% if obj.is_entry %}
{{ obj.body|safe }}
{{ obj.rendered|safe }}
{% if obj.tags.count %}
<p>Tags: {% for tag in obj.tags.all %}<a href="https://simonwillison.net/tags/{{ tag.tag }}">{{ tag.tag }}</a>{% if not forloop.last %}, {% endif %}{% endfor %}</p>
{% endif %}
Expand Down
Loading