123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279 |
- #!/usr/bin/env python3
- import fnmatch
- import locale
- import os
- from dataclasses import dataclass
- from datetime import datetime, timedelta
- from html import escape
- from pathlib import Path
- from textwrap import dedent
- from time import perf_counter
-
- import mistune
- from jinja2 import Environment as Env
- from jinja2 import FileSystemLoader
- from minicli import cli, run, wrap
- from mistune.directives import DirectiveInclude
- from PIL import Image
- from slugify import slugify
-
- # Useful for dates rendering within Jinja2.
- locale.setlocale(locale.LC_ALL, "fr_FR.UTF-8")
-
- HERE = Path(".")
- DAVID = HERE / "david"
- STATIC = HERE / ".." / "larlet-fr-static"
- DOMAIN = "https://larlet.fr"
- LOCAL_DOMAIN = "http://larlet.test:3579"
- # Hardcoding publication at 12 in Paris timezone.
- NORMALIZED_STRFTIME = "%Y-%m-%dT12:00:00+01:00"
- TODAY = datetime.today() + timedelta(hours=6)
-
-
- class MarkParser(mistune.InlineParser):
- """Parses `==foo==` as `<mark>foo</mark>`."""
-
- MARK = (
- r"(\={2})(?=[^\s*])("
- r"(?:\\[\\*]|[^*])*"
- r"(?:" + mistune.InlineParser.ESCAPE + r"|[^\s*]))\1"
- )
-
- RULE_NAMES = mistune.InlineParser.RULE_NAMES + ("mark",)
-
- def parse_mark(self, m, state):
- marker = m.group(1)
- text = m.group(2)
- return "mark", self.render(text, state)
-
-
- class MarkRenderer(mistune.HTMLRenderer):
- """To use in conjunction with `MarkParser`."""
-
- def mark(self, text):
- return "<mark>" + text + "</mark>"
-
-
- class BlockquoteLanguageRenderer(MarkRenderer):
- """Sets the English language attribute for blockquotes with `[en]` prefix."""
-
- def _get_language(self, text):
- if text.startswith("<p>[en] "):
- return "en", text.replace("<p>[en] ", "<p>")
- else:
- return None, text
-
- def block_quote(self, text):
- language, text = self._get_language(text)
- if language:
- return f'\n<blockquote lang="{language}">\n{text}</blockquote>\n'
- else:
- return f"\n<blockquote>\n{text}</blockquote>\n"
-
-
- class ImgsWithSizesRenderer(BlockquoteLanguageRenderer):
- """Renders images as <figure>s and add sizes (useful for lazy loading)."""
-
- def paragraph(self, text):
- # In case of a figure, we do not want the (non-standard) paragraph.
- if text.strip().startswith("<figure>"):
- return text
- return f"<p>{text}</p>\n"
-
- def image(self, src, alt="", title=None):
- full_path = STATIC / Path(src[1:])
- image = Image.open(full_path)
- width, height = image.size
- return dedent(
- f"""\
- <figure>
- <img src="{src}"
- alt="{alt}"
- loading="lazy" width="{width}" height="{height}" />
- <figcaption>{title}</figcaption>
- </figure>
- """
- )
-
-
- class H2AnchorsRenderer(ImgsWithSizesRenderer):
- """Custom renderer for H2 titles with anchors."""
-
- def heading(self, text, level):
- if level == 2:
- slug = slugify(text)
- return (
- f'<h2 id="{slug}">'
- f"{text} "
- f'<a href="#{slug}" title="Ancre vers cette partie">#</a>'
- f"</h2>"
- )
- else:
- return super().heading(text, level)
-
-
- # We want a custom renderer to create a hash/link for each H2 headings.
- markdown_with_h2_anchors = mistune.Markdown(
- renderer=H2AnchorsRenderer(escape=False),
- inline=MarkParser(H2AnchorsRenderer(escape=False)),
- plugins=[DirectiveInclude()],
- )
- # The second markdown is pertinent to generate articles for the feed,
- # we do not need anchors in that case.
- markdown_with_img_sizes = mistune.Markdown(
- renderer=ImgsWithSizesRenderer(escape=False),
- inline=MarkParser(ImgsWithSizesRenderer(escape=False)),
- plugins=[DirectiveInclude()],
- )
-
- # This is the jinja2 configuration to locate templates.
- environment = Env(loader=FileSystemLoader(str(DAVID / "templates")))
-
-
- def neighborhood(iterable, first=None, last=None):
- """
- Yield the (previous, current, next) items given an iterable.
-
- You can specify a `first` and/or `last` item for bounds.
- """
- iterator = iter(iterable)
- previous = first
- current = next(iterator) # Throws StopIteration if empty.
- for next_ in iterator:
- yield (previous, current, next_)
- previous = current
- current = next_
- yield (previous, current, last)
-
-
- def each_markdown_from(source_dir, file_name="*.md"):
- """Walk across the `source_dir` and return the md file paths."""
- for filename in fnmatch.filter(os.listdir(source_dir), file_name):
- yield os.path.join(source_dir, filename)
-
-
- @dataclass
- class Page:
- title: str
- content: str
- file_path: str
- lang: str = "fr"
-
- def __post_init__(self):
- suffix = len(".md")
- prefix = len("YYYY/MM-DD") + suffix
- date_str = self.file_path[-prefix:-suffix].replace("-", "/")
- self.url = f"/david/{date_str}/"
- self.date = datetime.strptime(date_str, "%Y/%m/%d").date()
- self.full_url = f"{DOMAIN}{self.url}"
- self.normalized_date = self.date.strftime(NORMALIZED_STRFTIME)
- self.escaped_title = escape(self.title)
- self.escaped_content = escape(
- self.content.replace('href="/', f'href="{DOMAIN}/')
- .replace('src="/', f'src="{DOMAIN}/')
- .replace('href="#', f'href="{self.full_url}#')
- + '<hr/><p><a href="mailto:david@larlet.fr">Réagir ?</a></p>'
- )
- # Extract first paragraph.
- self.extract = self.content.split("</p>", 1)[0] + "</p>"
-
- def __lt__(self, other: "Page"):
- if not isinstance(other, Page):
- return NotImplemented
- return self.date < other.date
-
- @staticmethod
- def all(source: Path, only_published=True, with_h2_anchors=True):
- """Retrieve all pages sorted by desc."""
- page_list = []
- md = markdown_with_h2_anchors if with_h2_anchors else markdown_with_img_sizes
- for file_path in each_markdown_from(source):
- result = md.read(file_path)
- # Extract (and remove) the title from the generated page.
- title, content = result.split("</h1>", 1)
- h1_opening_size = len("<h1>")
- title = title[h1_opening_size:]
- page = Page(title, content, file_path)
- if only_published and page.is_draft:
- continue
- page_list.append(page)
- return sorted(page_list, reverse=True)
-
- @property
- def is_draft(self):
- return (
- datetime(year=self.date.year, month=self.date.month, day=self.date.day)
- > TODAY
- )
-
-
- @cli
- def orphans():
- """Print out fragments not linked to any page."""
- linked_fragments = []
- for file_path in each_markdown_from(DAVID / "2020"):
- for line in open(file_path).readlines():
- if line.startswith(".. include:: fragments/"):
- linked_fragments.append(line[len(".. include:: fragments/") : -1])
- all_fragments = []
- for file_path in each_markdown_from(DAVID / "2020" / "fragments"):
- all_fragments.append(file_path[len("david/2020/fragments/") :])
- for fragment_filename in set(all_fragments) - set(linked_fragments):
- # Prepending path for easy command+click from fish.
- print(f"Orphan: {DAVID / '2020' / 'fragments' / fragment_filename}")
-
-
- @cli
- def pages():
- """Build the agregations from fragments."""
- root_path = DAVID / "2020"
- for previous, page, next_ in neighborhood(
- reversed(Page.all(source=root_path, only_published=False)),
- first={
- "url": "/david/stream/",
- "title": "Streams 2009-2019",
- "is_draft": False,
- },
- ):
- template = environment.get_template("article_2020.html")
- content = template.render(page=page, prev=previous, next=next_,)
- target_path = Path(page.url[1:])
- target_path.mkdir(parents=True, exist_ok=True)
- open(target_path / "index.html", "w").write(content)
- if page.is_draft:
- print(f"Draft: {LOCAL_DOMAIN}{page.url} ({page.title})")
- template = environment.get_template("archives_2020.html")
- content = template.render(page_list=Page.all(source=root_path))
- open(root_path / "index.html", "w").write(content)
-
-
- @cli
- def home():
- """Build the home page with last published items."""
- template = environment.get_template("profil.html")
- content = template.render(page_list=Page.all(source=DAVID / "2020"),)
- open(DAVID / "index.html", "w").write(content)
-
-
- @cli
- def feed():
- """Generate a feed from last published items."""
- template = environment.get_template("feed.xml")
- content = template.render(
- page_list=Page.all(source=DAVID / "2020", with_h2_anchors=False),
- current_dt=TODAY.strftime(NORMALIZED_STRFTIME),
- BASE_URL=f"{DOMAIN}/david/",
- )
- open(DAVID / "log" / "index.xml", "w").write(content)
-
-
- @wrap
- def perf_wrapper():
- start = perf_counter()
- yield
- elapsed = perf_counter() - start
- print(f"Done in {elapsed:.5f} seconds.")
-
-
- if __name__ == "__main__":
- run()
|