123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625 |
- #!/usr/bin/env python3
- import json
- import locale
- from collections import defaultdict
- from dataclasses import dataclass
- from datetime import datetime, timedelta
- from html import escape
- from itertools import groupby
- from pathlib import Path
- from string import Template
- from textwrap import dedent
- from time import perf_counter
-
- import mistune
- from jinja2 import Environment as Env
- from jinja2 import FileSystemLoader
- from jinja2.filters import do_striptags
- from minicli import cli, run, wrap
- from mistune.directives import DirectiveInclude
- from mistune.plugins import plugin_strikethrough
- from PIL import Image
- from slugify import slugify
-
- from typography import typographie
- from widont import widont
-
- # Useful for dates rendering within Jinja2.
- locale.setlocale(locale.LC_ALL, "fr_FR.UTF-8")
-
- HERE = Path(".")
- DAVID = HERE / "david"
- STATIC = HERE / ".." / "larlet-fr-static"
- DOMAIN = "https://larlet.fr"
- LOCAL_DOMAIN = "http://larlet.test:3579"
- # Hardcoding publication at 12 in Paris timezone.
- NORMALIZED_STRFTIME = "%Y-%m-%dT12:00:00+01:00"
- TODAY = datetime.today() + timedelta(hours=6)
- PUBLICATION_BUFFER = TODAY - timedelta(days=0)
- NB_ITEMS_IN_FEED = 30
- SOURCES_PATH = DAVID / "2023" / "_sources"
-
- all_tags = set()
- pages_by_tags = defaultdict(list)
- pages_by_url = {}
-
-
- class MarkParser(mistune.InlineParser):
- """Parses `==foo==` as `<mark>foo</mark>`."""
-
- MARK = (
- r"(\={2})(?=[^\s*])("
- r"(?:\\[\\*]|[^*])*"
- r"(?:" + mistune.InlineParser.ESCAPE + r"|[^\s*]))\1"
- )
-
- RULE_NAMES = mistune.InlineParser.RULE_NAMES + ("mark",)
-
- def parse_mark(self, m, state):
- _ = m.group(1)
- text = m.group(2)
- return "mark", self.render(text, state)
-
-
- class MarkRenderer(mistune.HTMLRenderer):
- """To use in conjunction with `MarkParser`."""
-
- def mark(self, text):
- return "<mark>" + text + "</mark>"
-
-
- class TagsRenderer(mistune.HTMLRenderer):
- """Make the asumption each line starting with a `#` is a tag."""
-
- def paragraph(self, text):
- if text.startswith("#"):
- tags = " ".join(
- f'<a href="/david/2023/{slugify(tag.strip())}/">#{tag.strip()}</a>'
- for tag in text.split("#")
- if tag.strip()
- )
- return f"<nav><p>{tags}</p></nav>\n"
- return super().paragraph(text)
-
-
- class FrenchTypographyRenderer(mistune.HTMLRenderer):
- """Apply French typographic rules to text."""
-
- def text(self, text):
- return typographie(super().text(text), html=True)
-
- def block_html(self, html):
- return typographie(super().block_html(html), html=True)
-
-
- class InternalLinkTitleRenderer(mistune.HTMLRenderer):
- """Automatically generate the title for internal links."""
-
- def link(self, link, text=None, title=None):
- if text is None:
- text = link
-
- s = '<a href="' + self._safe_url(link) + '"'
-
- if not title and link.startswith("/david/2023/"):
- # It will not work for internal links referencing the future.
- page = pages_by_url.get(link)
- if page:
- title = page.title
-
- if title:
- s += ' title="' + mistune.escape_html(title) + '"'
- return s + ">" + (text or link) + "</a>"
-
-
- class CustomAndBlockquoteLanguageRenderer(
- FrenchTypographyRenderer, InternalLinkTitleRenderer, MarkRenderer, TagsRenderer
- ):
- """Sets the English language attribute for blockquotes with `[en]` prefix."""
-
- def _get_language(self, text):
- if text.startswith("<p>[en] "):
- return "en", text.replace("<p>[en] ", "<p>")
- else:
- return None, text
-
- def block_quote(self, text):
- language, text = self._get_language(text)
- if language:
- return f'\n<blockquote lang="{language}">\n{text}</blockquote>\n'
- else:
- return f"\n<blockquote>\n{text}</blockquote>\n"
-
-
- class ImgsWithSizesRenderer(CustomAndBlockquoteLanguageRenderer):
- """Renders images as <figure>s and add sizes."""
-
- def paragraph(self, text):
- # In case of a figure, we do not want the (non-standard) paragraph.
- if text.strip().startswith("<figure>"):
- return text
- return super().paragraph(text)
-
- def _generate_size(self, src, width, height):
- src_size = src.replace(".jpg", f"_{width}x{height}.jpg")
- full_path = STATIC / Path(src[1:])
- full_path_size = STATIC / Path(src_size[1:])
- if full_path_size.exists() or "/2023/" not in src:
- return src_size
-
- image = Image.open(full_path)
- image.thumbnail((width, height), resample=Image.LANCZOS)
- image.save(full_path_size, icc_profile=image.info.get("icc_profile"))
- return src_size
-
- def _generate_webp(self, src):
- src_webp = src.replace(".jpg", ".webp")
- full_path = STATIC / Path(src[1:])
- full_path_webp = STATIC / Path(src_webp[1:])
- if full_path_webp.exists() or "/2023/" not in src:
- return src_webp
-
- image = Image.open(full_path)
- image.save(
- full_path_webp, format="webp", icc_profile=image.info.get("icc_profile")
- )
- # command = [
- # "cwebp",
- # "-q",
- # "80",
- # full_path,
- # "-o",
- # full_path_webp,
- # "-metadata",
- # "icc",
- # ]
- # subprocess.check_output(command, stderr=subprocess.STDOUT)
-
- return src_webp
-
- def image(self, src, alt="", title=None):
- SIZES = [(660, 440), (990, 660), (1320, 880)]
- full_path = STATIC / Path(src[1:])
- image = Image.open(full_path)
- width, height = image.size
- jpg_srcs = [(src, width, height)]
- # src_webp = self._generate_webp(src)
- # webp_srcs = [(src_webp, width, height)]
- for size_width, size_height in SIZES:
- src_size = self._generate_size(src, size_width, size_height)
- jpg_srcs.append((src_size, size_width, size_height))
- # src_size_webp = self._generate_webp(src_size)
- # webp_srcs.append((src_size_webp, size_width, size_height))
-
- jpg_srcsets = ", ".join(
- f"{jpg_src} {jpg_width}w" for jpg_src, jpg_width, jpg_height in jpg_srcs
- )
- # webp_srcsets = ", ".join(
- # f"{webp_src} {webp_width}w"
- # for webp_src, webp_width, webp_height in webp_srcs
- # )
- return dedent(
- f"""\
- <figure>
- <a href="{src}"
- title="Cliquer pour une version haute résolution">
- <img
- src="{src}"
- width="{width}" height="{height}"
- srcset="{jpg_srcsets}"
- sizes="min(100vw, calc(100vh * {width} / {height}))"
- loading="lazy"
- decoding="async"
- alt="{alt}">
- </a>
- <figcaption>{title}</figcaption>
- </figure>
- """
- )
-
-
- class H2AnchorsRenderer(ImgsWithSizesRenderer):
- """Custom renderer for H2 titles with anchors."""
-
- def heading(self, text, level):
- if level == 2:
- slug = slugify(text)
- return (
- f'<h2 id="{slug}">'
- f"{text} "
- f'<a href="#{slug}" title="Ancre vers cette partie">#</a>'
- f"</h2>"
- )
- else:
- return super().heading(text, level)
-
-
- # We want a custom renderer to create a hash/link for each H2 headings.
- markdown_with_h2_anchors = mistune.Markdown(
- renderer=H2AnchorsRenderer(escape=False),
- inline=MarkParser(H2AnchorsRenderer(escape=False)),
- plugins=[DirectiveInclude(), plugin_strikethrough],
- )
- # The second markdown is pertinent to generate articles for the feed,
- # we do not need anchors in that case.
- markdown_with_img_sizes = mistune.Markdown(
- renderer=ImgsWithSizesRenderer(escape=False),
- inline=MarkParser(ImgsWithSizesRenderer(escape=False)),
- plugins=[DirectiveInclude(), plugin_strikethrough],
- )
-
- # This is the jinja2 configuration to locate templates.
- environment = Env(loader=FileSystemLoader(str(DAVID / "templates")))
-
-
- def neighborhood(iterable, first=None, last=None):
- """
- Yield the (previous, current, next) items given an iterable.
-
- You can specify a `first` and/or `last` item for bounds.
- """
- iterator = iter(iterable)
- previous = first
- current = next(iterator) # Throws StopIteration if empty.
- for next_ in iterator:
- yield (previous, current, next_)
- previous = current
- current = next_
- yield (previous, current, last)
-
-
- def each_file_from(source_dir, pattern="*", exclude=None):
- """Walk across the `source_dir` and return the `pattern` file paths."""
- for path in _each_path_from(source_dir, pattern=pattern, exclude=exclude):
- if path.is_file():
- yield path
-
-
- def each_folder_from(source_dir, exclude=None):
- """Walk across the `source_dir` and return the folder paths."""
- for path in _each_path_from(source_dir, exclude=exclude):
- if path.is_dir():
- yield path
-
-
- def _each_path_from(source_dir, pattern="*", exclude=None):
- for path in sorted(Path(source_dir).glob(pattern)):
- if exclude is not None and path.name in exclude:
- continue
- yield path
-
-
- @dataclass
- class Page:
- title: str
- content: str
- tags: list
- file_path: str
- lang: str = "fr"
-
- def __post_init__(self):
- try:
- date_str, _ = self.file_path.split(" - ", 1)
- except ValueError:
- # Fallback for 2020 contents (search index)
- suffix = len(".md")
- prefix = len("YYYY/MM-DD") + suffix
- date_str = "2020-" + self.file_path[-prefix:-suffix]
- self.url = f"/david/{date_str.replace('-', '/')}/"
- self.date = datetime.strptime(date_str, "%Y-%m-%d").date()
- self.full_url = f"{DOMAIN}{self.url}"
- self.normalized_date = self.date.strftime(NORMALIZED_STRFTIME)
- self.escaped_title = escape(self.title)
- tag_template = Template(
- f'<a href="{DOMAIN}/david/2023/$tag_slug/">#$tag_name</a>'
- )
- tag_links = " ".join(
- tag_template.substitute(tag_slug=slugify(tag), tag_name=tag)
- for tag in self.tags
- )
- self.escaped_content = escape(
- self.content.replace('href="/', f'href="{DOMAIN}/')
- .replace('src="/', f'src="{DOMAIN}/')
- .replace('href="#', f'href="{self.full_url}#')
- + f"<nav><p>{tag_links}</p></nav>"
- + '<hr/><p><a href="mailto:david@larlet.fr">Réagir ?</a></p>'
- )
- # Extract first paragraph.
- self.extract = self.content.split("</p>", 1)[0] + "</p>"
- # Create the index for the search.
- self.search_data = {
- "title": self.title,
- "url": self.url,
- "date": date_str,
- "content": do_striptags(self.content)
- .replace("\u00a0(cache)", " ")
- .replace("'", " "),
- }
-
- def __eq__(self, other):
- return self.url == other.url
-
- def __lt__(self, other: "Page"):
- if not isinstance(other, Page):
- return NotImplemented
- return self.date < other.date
-
- @staticmethod
- def all(source: Path, only_published=True, with_h2_anchors=True):
- """Retrieve all pages sorted by desc."""
- page_list = []
- md = markdown_with_h2_anchors if with_h2_anchors else markdown_with_img_sizes
- for file_path in sorted(each_file_from(source, pattern="*.md")):
- result = md.read(file_path)
- result = widont(result, html=True)
- # Extract (and remove) the title from the generated page.
- title, content = result.split("</h1>", 1)
- h1_opening_size = len("<h1>")
- title = title[h1_opening_size:]
- tags = {}
- if "<nav><p>" in content:
- # Extract the tags from the generated page.
- content, tags_links = content.split("<nav><p>", 1)
- nav_closing_size = len("</p></nav>\n")
- tags_links = tags_links[:-nav_closing_size]
- try:
- tags = sorted(
- {
- tag.strip().split("#", 1)[1]
- for tag in tags_links.split("</a>")
- if tag.strip()
- },
- key=lambda tag: slugify(tag),
- )
- except IndexError:
- # It happens for old contents, parsed for the search index.
- pass
- page = Page(title, content, tags, file_path.name)
- pages_by_url[page.url] = page
- if not page.is_draft:
- all_tags.update(tags)
- for tag in tags:
- if page not in pages_by_tags[tag]:
- pages_by_tags[tag].append(page)
- if only_published and page.is_draft:
- continue
- page_list.append(page)
- return sorted(page_list, reverse=True)
-
- @property
- def is_draft(self):
- return (
- datetime(year=self.date.year, month=self.date.month, day=self.date.day)
- > PUBLICATION_BUFFER
- )
-
-
- @cli
- def pages():
- """Build article pages."""
- root_path = DAVID / "2023"
- for previous, page, next_ in neighborhood(
- reversed(Page.all(source=SOURCES_PATH, only_published=False)),
- first={
- "url": "/david/2022/",
- "title": "Publications 2022",
- "is_draft": False,
- },
- ):
- template = environment.get_template("article_2020.html")
- content = template.render(page=page, prev=previous, next=next_, slugify=slugify)
- target_path = Path(page.url[1:])
- target_path.mkdir(parents=True, exist_ok=True)
- open(target_path / "index.html", "w").write(content)
- if page.is_draft:
- print(f"Draft: {LOCAL_DOMAIN}{page.url} ({page.title})")
-
- def group_by_month_year(item):
- return item.date.strftime("%B %Y").title()
-
- template = environment.get_template("archives_2020.html")
- page_list = reversed(Page.all(source=SOURCES_PATH))
- tags = sorted((slugify(tag), tag, len(pages_by_tags[tag])) for tag in all_tags)
- content = template.render(
- page_list=groupby(page_list, key=group_by_month_year), tags=tags
- )
- open(root_path / "index.html", "w").write(content)
-
-
- @cli
- def tags():
- """Build tags pages."""
- # Parse all pages to collect tags.
- Page.all(source=SOURCES_PATH, only_published=True)
- for tag in all_tags:
- template = environment.get_template("tag_2021.html")
- content = template.render(
- page_list=sorted(pages_by_tags[tag], reverse=True),
- tag_name=tag,
- )
- target_path = DAVID / "2023" / slugify(tag)
- target_path.mkdir(parents=True, exist_ok=True)
- open(target_path / "index.html", "w").write(content)
-
-
- @cli
- def home():
- """Build the home page with last published items."""
- template = environment.get_template("profil.html")
- page_list = Page.all(source=SOURCES_PATH, only_published=True)
- tags = sorted((slugify(tag), tag, len(pages_by_tags[tag])) for tag in all_tags)
- content = template.render(page_list=page_list, tags=tags)
- open(DAVID / "index.html", "w").write(content)
-
-
- @cli
- def toot():
- """Pre-write the Mastodon message."""
- page_list = Page.all(source=SOURCES_PATH, only_published=True)
- last_published = page_list[0]
- print(f"✍️ QUOTE? — {last_published.title}, {last_published.full_url}")
- print()
- print("#blog #larletfr #rss")
- print(" ".join([f"#{tag}" for tag in last_published.tags]))
-
-
- @cli
- def search():
- """Build the static search page with custom index."""
- template = environment.get_template("recherche.html")
- page_list_2023 = Page.all(
- source=SOURCES_PATH, only_published=True, with_h2_anchors=False
- )
- page_list_2022 = Page.all(
- source=DAVID / "2022" / "_sources", only_published=True, with_h2_anchors=False
- )
- page_list_2021 = Page.all(
- source=DAVID / "2021" / "sources", only_published=True, with_h2_anchors=False
- )
- page_list_2020 = Page.all(
- source=DAVID / "2020", only_published=True, with_h2_anchors=False
- )
- blog_page_list_2019 = BlogPage.all(source=DAVID / "blog" / "2019")
- blog_page_list_2018 = BlogPage.all(source=DAVID / "blog" / "2018")
- blog_page_list_2017 = BlogPage.all(source=DAVID / "blog" / "2017")
- stream_page_list_2019 = StreamPage.all(source=DAVID / "stream" / "2019")
- stream_page_list_2018 = StreamPage.all(source=DAVID / "stream" / "2018")
- page_list = (
- page_list_2023
- + page_list_2022
- + page_list_2021
- + page_list_2020
- + blog_page_list_2019
- + blog_page_list_2018
- + blog_page_list_2017
- + stream_page_list_2019
- + stream_page_list_2018
- )
- search_index = json.dumps([page.search_data for page in page_list], indent=2)
- content = template.render(search_index=search_index)
- open(DAVID / "recherche" / "index.html", "w").write(content)
-
-
- @cli
- def feed():
- """Generate a feed from last published items."""
- template = environment.get_template("feed.xml")
- page_list = Page.all(source=SOURCES_PATH, with_h2_anchors=False)
- content = template.render(
- page_list=page_list[:NB_ITEMS_IN_FEED],
- current_dt=TODAY.strftime(NORMALIZED_STRFTIME),
- BASE_URL=f"{DOMAIN}/david/",
- )
- open(DAVID / "log" / "index.xml", "w").write(content)
-
-
- @wrap
- def perf_wrapper():
- start = perf_counter()
- yield
- elapsed = perf_counter() - start
- print(f"Done in {elapsed:.5f} seconds.")
-
-
- # Below are legacy blog contents, still useful for search indexation.
- @dataclass
- class BlogPage:
- title: str
- content: str
- file_path: str
- date_str: str
-
- def __post_init__(self):
- self.date = datetime.strptime(self.date_str, "%Y-%m-%d").date()
- self.url = f"/{self.file_path}/"
- # Create the index for the search.
- self.search_data = {
- "title": self.title,
- "url": self.url,
- "date": self.date_str,
- "content": do_striptags(self.content)
- .replace("\u00a0(cache)", " ")
- .replace("'", " ")
- .replace("<", "<")
- .replace(">", ">"),
- }
-
- def __eq__(self, other):
- return self.url == other.url
-
- def __lt__(self, other: "BlogPage"):
- if not isinstance(other, self.__class__):
- return NotImplemented
- return self.date < other.date
-
- @staticmethod
- def all(source: Path):
- """Retrieve all pages sorted by desc."""
- page_list = []
- for folder in each_folder_from(source):
- for path in each_file_from(folder, pattern="*.md"):
- metadata, content = path.read_text().split("\n\n", 1)
- if "lang:" in metadata:
- title, slug, date_, chapo, lang = metadata.split("\n")
- else:
- title, slug, date_, chapo = metadata.split("\n")
- title = title[len("title: ") :].strip()
- date_str = date_[len("date: ") :].strip()
- content = markdown_with_img_sizes(content)
- page = BlogPage(title, content, path.parent, date_str)
- page_list.append(page)
- return sorted(page_list, reverse=True)
-
-
- @dataclass
- class StreamPage:
- title: str
- content: str
- file_path: str
- date_str: str
-
- def __post_init__(self):
- self.date = datetime.strptime(self.date_str, "%Y/%m/%d").date()
- self.url = f"/{self.file_path}/"
- # Create the index for the search.
- self.search_data = {
- "title": self.title,
- "url": self.url,
- "date": self.date.isoformat(),
- "content": do_striptags(self.content)
- .replace("\u00a0(cache)", " ")
- .replace("'", " ")
- .replace("<", "<")
- .replace(">", ">"),
- }
-
- def __eq__(self, other):
- return self.url == other.url
-
- def __lt__(self, other: "StreamPage"):
- if not isinstance(other, self.__class__):
- return NotImplemented
- return self.date < other.date
-
- @staticmethod
- def all(source: Path):
- """Retrieve all pages sorted by desc."""
- page_list = []
- for folder in each_folder_from(source):
- for subfolder in each_folder_from(folder):
- for path in each_file_from(subfolder, pattern="*.md"):
- metadata, content = path.read_text().split("\n\n", 1)
- if "lang:" in metadata:
- title, lang = metadata.split("\n")
- else:
- title = metadata.strip()
- title = title[len("title: ") :].strip()
- date_str = str(path.parent)[-len("YYYY/MM/DD") :]
- content = markdown_with_img_sizes(content)
- page = StreamPage(title, content, path.parent, date_str)
- page_list.append(page)
- return sorted(page_list, reverse=True)
-
-
- if __name__ == "__main__":
- run()
|