|
|
|
|
|
|
|
|
#!/usr/bin/env python3 |
|
|
#!/usr/bin/env python3 |
|
|
import fnmatch |
|
|
|
|
|
import json |
|
|
import json |
|
|
import locale |
|
|
import locale |
|
|
import os |
|
|
|
|
|
from collections import defaultdict |
|
|
from collections import defaultdict |
|
|
from dataclasses import dataclass |
|
|
from dataclasses import dataclass |
|
|
from datetime import datetime, timedelta |
|
|
from datetime import datetime, timedelta |
|
|
|
|
|
|
|
|
yield (previous, current, last) |
|
|
yield (previous, current, last) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def each_markdown_from(source_dir, file_name="*.md"): |
|
|
|
|
|
"""Walk across the `source_dir` and return the md file paths.""" |
|
|
|
|
|
for filename in fnmatch.filter(os.listdir(source_dir), file_name): |
|
|
|
|
|
yield filename |
|
|
|
|
|
|
|
|
def each_file_from(source_dir, pattern="*", exclude=None): |
|
|
|
|
|
"""Walk across the `source_dir` and return the `pattern` file paths.""" |
|
|
|
|
|
for path in _each_path_from(source_dir, pattern=pattern, exclude=exclude): |
|
|
|
|
|
if path.is_file(): |
|
|
|
|
|
yield path |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def each_folder_from(source_dir, exclude=None): |
|
|
|
|
|
"""Walk across the `source_dir` and return the folder paths.""" |
|
|
|
|
|
for path in _each_path_from(source_dir, exclude=exclude): |
|
|
|
|
|
if path.is_dir(): |
|
|
|
|
|
yield path |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _each_path_from(source_dir, pattern="*", exclude=None): |
|
|
|
|
|
for path in sorted(Path(source_dir).glob(pattern)): |
|
|
|
|
|
if exclude is not None and path.name in exclude: |
|
|
|
|
|
continue |
|
|
|
|
|
yield path |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass |
|
|
@dataclass |
|
|
|
|
|
|
|
|
"""Retrieve all pages sorted by desc.""" |
|
|
"""Retrieve all pages sorted by desc.""" |
|
|
page_list = [] |
|
|
page_list = [] |
|
|
md = markdown_with_h2_anchors if with_h2_anchors else markdown_with_img_sizes |
|
|
md = markdown_with_h2_anchors if with_h2_anchors else markdown_with_img_sizes |
|
|
for file_name in sorted(each_markdown_from(source)): |
|
|
|
|
|
result = md.read(source / file_name) |
|
|
|
|
|
|
|
|
for file_path in sorted(each_file_from(source, pattern="*.md")): |
|
|
|
|
|
result = md.read(file_path) |
|
|
result = widont(result, html=True) |
|
|
result = widont(result, html=True) |
|
|
# Extract (and remove) the title from the generated page. |
|
|
# Extract (and remove) the title from the generated page. |
|
|
title, content = result.split("</h1>", 1) |
|
|
title, content = result.split("</h1>", 1) |
|
|
|
|
|
|
|
|
except IndexError: |
|
|
except IndexError: |
|
|
# It happens for old contents, parsed for the search index. |
|
|
# It happens for old contents, parsed for the search index. |
|
|
pass |
|
|
pass |
|
|
page = Page(title, content, tags, file_name) |
|
|
|
|
|
|
|
|
page = Page(title, content, tags, file_path.name) |
|
|
pages_by_url[page.url] = page |
|
|
pages_by_url[page.url] = page |
|
|
if not page.is_draft: |
|
|
if not page.is_draft: |
|
|
all_tags.update(tags) |
|
|
all_tags.update(tags) |
|
|
|
|
|
|
|
|
page_list_2020 = Page.all( |
|
|
page_list_2020 = Page.all( |
|
|
source=DAVID / "2020", only_published=True, with_h2_anchors=False |
|
|
source=DAVID / "2020", only_published=True, with_h2_anchors=False |
|
|
) |
|
|
) |
|
|
page_list = page_list_2022 + page_list_2021 + page_list_2020 |
|
|
|
|
|
|
|
|
blog_page_list_2019 = BlogPage.all(source=DAVID / "blog" / "2019") |
|
|
|
|
|
blog_page_list_2018 = BlogPage.all(source=DAVID / "blog" / "2018") |
|
|
|
|
|
blog_page_list_2017 = BlogPage.all(source=DAVID / "blog" / "2017") |
|
|
|
|
|
stream_page_list_2019 = StreamPage.all(source=DAVID / "stream" / "2019") |
|
|
|
|
|
stream_page_list_2018 = StreamPage.all(source=DAVID / "stream" / "2018") |
|
|
|
|
|
page_list = ( |
|
|
|
|
|
page_list_2022 |
|
|
|
|
|
+ page_list_2021 |
|
|
|
|
|
+ page_list_2020 |
|
|
|
|
|
+ blog_page_list_2019 |
|
|
|
|
|
+ blog_page_list_2018 |
|
|
|
|
|
+ blog_page_list_2017 |
|
|
|
|
|
+ stream_page_list_2019 |
|
|
|
|
|
+ stream_page_list_2018 |
|
|
|
|
|
) |
|
|
search_index = json.dumps([page.search_data for page in page_list], indent=2) |
|
|
search_index = json.dumps([page.search_data for page in page_list], indent=2) |
|
|
content = template.render(search_index=search_index) |
|
|
content = template.render(search_index=search_index) |
|
|
open(DAVID / "recherche" / "index.html", "w").write(content) |
|
|
open(DAVID / "recherche" / "index.html", "w").write(content) |
|
|
|
|
|
|
|
|
print(f"Done in {elapsed:.5f} seconds.") |
|
|
print(f"Done in {elapsed:.5f} seconds.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Below are legacy blog contents, still useful for search indexation. |
|
|
|
|
|
@dataclass |
|
|
|
|
|
class BlogPage: |
|
|
|
|
|
title: str |
|
|
|
|
|
content: str |
|
|
|
|
|
file_path: str |
|
|
|
|
|
date_str: str |
|
|
|
|
|
|
|
|
|
|
|
def __post_init__(self): |
|
|
|
|
|
self.date = datetime.strptime(self.date_str, "%Y-%m-%d").date() |
|
|
|
|
|
self.url = f"/{self.file_path}/" |
|
|
|
|
|
# Create the index for the search. |
|
|
|
|
|
self.search_data = { |
|
|
|
|
|
"title": self.title, |
|
|
|
|
|
"url": self.url, |
|
|
|
|
|
"date": self.date_str, |
|
|
|
|
|
"content": do_striptags(self.content) |
|
|
|
|
|
.replace("\u00a0(cache)", " ") |
|
|
|
|
|
.replace("'", " ") |
|
|
|
|
|
.replace("<", "<") |
|
|
|
|
|
.replace(">", ">"), |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
def __eq__(self, other): |
|
|
|
|
|
return self.url == other.url |
|
|
|
|
|
|
|
|
|
|
|
def __lt__(self, other: "BlogPage"): |
|
|
|
|
|
if not isinstance(other, self.__class__): |
|
|
|
|
|
return NotImplemented |
|
|
|
|
|
return self.date < other.date |
|
|
|
|
|
|
|
|
|
|
|
@staticmethod |
|
|
|
|
|
def all(source: Path): |
|
|
|
|
|
"""Retrieve all pages sorted by desc.""" |
|
|
|
|
|
page_list = [] |
|
|
|
|
|
for folder in each_folder_from(source): |
|
|
|
|
|
for path in each_file_from(folder, pattern="*.md"): |
|
|
|
|
|
metadata, content = path.read_text().split("\n\n", 1) |
|
|
|
|
|
if "lang:" in metadata: |
|
|
|
|
|
title, slug, date_, chapo, lang = metadata.split("\n") |
|
|
|
|
|
else: |
|
|
|
|
|
title, slug, date_, chapo = metadata.split("\n") |
|
|
|
|
|
title = title[len("title: ") :].strip() |
|
|
|
|
|
date_str = date_[len("date: ") :].strip() |
|
|
|
|
|
content = markdown_with_img_sizes(content) |
|
|
|
|
|
page = BlogPage(title, content, path.parent, date_str) |
|
|
|
|
|
page_list.append(page) |
|
|
|
|
|
return sorted(page_list, reverse=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass |
|
|
|
|
|
class StreamPage: |
|
|
|
|
|
title: str |
|
|
|
|
|
content: str |
|
|
|
|
|
file_path: str |
|
|
|
|
|
date_str: str |
|
|
|
|
|
|
|
|
|
|
|
def __post_init__(self): |
|
|
|
|
|
self.date = datetime.strptime(self.date_str, "%Y/%m/%d").date() |
|
|
|
|
|
self.url = f"/{self.file_path}/" |
|
|
|
|
|
# Create the index for the search. |
|
|
|
|
|
self.search_data = { |
|
|
|
|
|
"title": self.title, |
|
|
|
|
|
"url": self.url, |
|
|
|
|
|
"date": self.date.isoformat(), |
|
|
|
|
|
"content": do_striptags(self.content) |
|
|
|
|
|
.replace("\u00a0(cache)", " ") |
|
|
|
|
|
.replace("'", " ") |
|
|
|
|
|
.replace("<", "<") |
|
|
|
|
|
.replace(">", ">"), |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
def __eq__(self, other): |
|
|
|
|
|
return self.url == other.url |
|
|
|
|
|
|
|
|
|
|
|
def __lt__(self, other: "StreamPage"): |
|
|
|
|
|
if not isinstance(other, self.__class__): |
|
|
|
|
|
return NotImplemented |
|
|
|
|
|
return self.date < other.date |
|
|
|
|
|
|
|
|
|
|
|
@staticmethod |
|
|
|
|
|
def all(source: Path): |
|
|
|
|
|
"""Retrieve all pages sorted by desc.""" |
|
|
|
|
|
page_list = [] |
|
|
|
|
|
for folder in each_folder_from(source): |
|
|
|
|
|
for subfolder in each_folder_from(folder): |
|
|
|
|
|
for path in each_file_from(subfolder, pattern="*.md"): |
|
|
|
|
|
metadata, content = path.read_text().split("\n\n", 1) |
|
|
|
|
|
if "lang:" in metadata: |
|
|
|
|
|
title, lang = metadata.split("\n") |
|
|
|
|
|
else: |
|
|
|
|
|
title = metadata.strip() |
|
|
|
|
|
title = title[len("title: ") :].strip() |
|
|
|
|
|
date_str = str(path.parent)[-len("YYYY/MM/DD") :] |
|
|
|
|
|
content = markdown_with_img_sizes(content) |
|
|
|
|
|
page = StreamPage(title, content, path.parent, date_str) |
|
|
|
|
|
page_list.append(page) |
|
|
|
|
|
return sorted(page_list, reverse=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
if __name__ == "__main__": |
|
|
run() |
|
|
run() |