@@ -161,7 +161,7 @@ def fetch_metadata(title, url, description): | |||
"""Fetch additional metadata.""" | |||
parsed_url = urlparse(url) | |||
root_url = f"{parsed_url.scheme}://{parsed_url.netloc}/" | |||
data = lassie.fetch(url) | |||
data = lassie.fetch(url, all_images=True) | |||
og_image = "" | |||
favicon = "" | |||
for image in data.get("images"): | |||
@@ -182,6 +182,14 @@ def fetch_metadata(title, url, description): | |||
elif ".ico" in favicon and ".ico" not in image_src: | |||
favicon = image_src | |||
if not og_image: | |||
for image in data.get("images"): | |||
if image_type == "body_image": | |||
image_src = image.get("src") | |||
if "favicon" not in image_src: | |||
og_image = image_src | |||
break | |||
# Fallback on server's default. | |||
if not favicon: | |||
favico_url = f"{root_url}favicon.ico" | |||
@@ -199,21 +207,24 @@ def fetch_metadata(title, url, description): | |||
def metadata(): | |||
"""Fetch additional metadata for existing archives.""" | |||
for cache in Cache.all(): | |||
# That one is takin way too long. | |||
# That one is taking way too long. | |||
if cache.url.startswith("https://tw5.immateriel.fr"): | |||
print("Skipping", cache.url) | |||
print("Skipping (too long)", cache.url) | |||
continue | |||
if cache.og_image and cache.description and cache.favicon: | |||
print("Skipping (all good)", cache.url) | |||
continue | |||
if cache.url.startswith( | |||
( | |||
"https://www.la-grange.net", | |||
"https://gilest.org", | |||
"https://vasilis.nl", | |||
"https://www.danmcquillan.org", | |||
"https://public-inbox.org", | |||
) | |||
) and (cache.og_image or cache.description or cache.favicon): | |||
print("Skipping (known missing infos)", cache.url) | |||
continue | |||
# if cache.description or cache.url.startswith( | |||
# ( | |||
# "https://www.la-grange.net", | |||
# "https://gilest.org", | |||
# "https://vasilis.nl", | |||
# "https://www.danmcquillan.org", | |||
# "https://public-inbox.org", | |||
# ) | |||
# ): | |||
# print("Skipping", cache.url) | |||
# continue | |||
print("Fetching metadata for", cache.url, cache.title) | |||
og_image, description, favicon = fetch_metadata( | |||
cache.title, cache.url, cache.description |
@@ -2,7 +2,7 @@ title: Deep Democracy - IAPOP | |||
url: https://iapop.com/deep-democracy/ | |||
hash_url: 1d60fc5548a6fe61da80a4e16892fa0c | |||
archive_date: 2024-01-31 | |||
og_image: | |||
og_image: https://iapop.com/wp-content/uploads/2018/03/iapop_world_icon-blue-150px-tiny.png | |||
description: Definition of Deep Democracy The concept of Deep Democracy was developed by Arnold Mindell. It is defined as an attitude and a principle. Attitude: Deep Democracy is an attitude that […] | |||
favicon: https://iapop.com/wp-content/uploads/2018/03/iapop_world_icon-blue-150px-tiny.png | |||
@@ -2,7 +2,7 @@ title: ce que nous laissons | |||
url: https://www.la-grange.net/2024/01/23/legacy | |||
hash_url: 1fe484434058e9c44d21bfebb0ddba31 | |||
archive_date: 2024-01-28 | |||
og_image: | |||
og_image: https://www.la-grange.net/2024/01/23/3152-ciel.jpg | |||
description: | |||
favicon: https://www.la-grange.net/favicon.ico | |||
@@ -2,7 +2,7 @@ title: carnet d'archéologue | |||
url: https://www.la-grange.net/2024/01/22/carnet | |||
hash_url: 790f724c45b26de460f9eeac04d48884 | |||
archive_date: 2024-01-28 | |||
og_image: | |||
og_image: https://www.la-grange.net/2024/01/20/3134-carnets.jpg | |||
description: | |||
favicon: https://www.la-grange.net/favicon.ico | |||
@@ -2,7 +2,7 @@ title: je ne sais pas pourquoi | |||
url: https://www.la-grange.net/2024/01/11/pourquoi | |||
hash_url: 87c468a4eddabe5d2c28e902d7f17504 | |||
archive_date: 2024-01-11 | |||
og_image: | |||
og_image: https://www.la-grange.net/2024/01/09/3045-glace.jpg | |||
description: | |||
favicon: https://www.la-grange.net/favicon.ico | |||
@@ -2,7 +2,7 @@ title: herbe | |||
url: https://www.la-grange.net/2024/01/24/herbe | |||
hash_url: 956819385548bba6e768563b12edc2d6 | |||
archive_date: 2024-01-28 | |||
og_image: | |||
og_image: https://www.la-grange.net/2024/01/24/3154-herbe.jpg | |||
description: | |||
favicon: https://www.la-grange.net/favicon.ico | |||
@@ -2,7 +2,7 @@ title: notes | |||
url: https://www.la-grange.net/2023/07/10/notes-train | |||
hash_url: 9bc04d41d25fc73391116d99b7259a3d | |||
archive_date: 2024-01-07 | |||
og_image: | |||
og_image: https://www.la-grange.net/2023/07/10/0797-batiment-visage.jpg | |||
description: | |||
favicon: https://www.la-grange.net/favicon.ico | |||
@@ -2,7 +2,7 @@ title: Data Luddism | |||
url: https://www.danmcquillan.org/dataluddism.html | |||
hash_url: b1da1249f2db388d7e84d6ad23c2fc5d | |||
archive_date: 2024-01-09 | |||
og_image: | |||
og_image: https://www.danmcquillan.org/images/burnmill.jpg | |||
description: | |||
favicon: | |||
@@ -2,7 +2,7 @@ title: fraîchement | |||
url: https://www.la-grange.net/2024/01/26/fraichement | |||
hash_url: b692faaa55fd2775e957b20e833e9e5e | |||
archive_date: 2024-01-28 | |||
og_image: | |||
og_image: https://www.la-grange.net/2024/01/26/3159-chauffage.jpg | |||
description: | |||
favicon: https://www.la-grange.net/favicon.ico | |||
@@ -2,7 +2,7 @@ title: Manuel de survie de la femme dans la tech | |||
url: https://www.duchess-france.fr/dossier/women%20in%20tech/alli%C3%A9s/2023/01/15/manuel-survie-femme-tech.html | |||
hash_url: c4751e7c80b292e3533ee6b3e057b702 | |||
archive_date: 2024-01-21 | |||
og_image: | |||
og_image: https://www.duchess-france.fr/assets/bandeau.jpeg | |||
description: Je vois de plus en plus de femmes rejoindre l’informatique, et c’est une très bonne chose. Je vois aussi trop de femmes patir de sexisme ordinaire, se remettre en question encore et encore… et quitter le milieu au bout de quelques années. J’ai mis du temps à apprendre certaines choses. | |||
favicon: https://www.duchess-france.fr/favicon.ico | |||
@@ -2,7 +2,7 @@ title: Unsigned Commits | |||
url: https://blog.glyph.im/2024/01/unsigned-commits.html | |||
hash_url: ce5fdc61fd66cdb9ce548fb543eba986 | |||
archive_date: 2024-01-25 | |||
og_image: | |||
og_image: https://blog.glyph.im/images/back9.png | |||
description: Deciphering Glyph, the blog of Glyph Lefkowitz. | |||
favicon: https://blog.glyph.im/images/favicon.ico | |||
@@ -2,7 +2,7 @@ title: Reconsider your partnership with Brave | |||
url: https://kagifeedback.org/d/2808-reconsider-your-partnership-with-brave/6 | |||
hash_url: d236f33cf82727313d17cb23bf36a395 | |||
archive_date: 2024-01-07 | |||
og_image: | |||
og_image: https://kagifeedback.org/assets/favicon-bmwk4ltf.png | |||
description: Brave, as you know, is led by Brendan Eich. s homophobia is so disgusting that he was forced to resign as the leader... | |||
favicon: https://kagifeedback.org/assets/favicon-bmwk4ltf.png | |||
@@ -2,7 +2,7 @@ title: plaisir d'ébauche | |||
url: https://www.la-grange.net/2024/01/06/ebauche | |||
hash_url: d75afc90a9d3c3b5a56b69446795fbb5 | |||
archive_date: 2024-01-07 | |||
og_image: | |||
og_image: https://www.la-grange.net/2024/01/06/3008-furikake.jpg | |||
description: | |||
favicon: https://www.la-grange.net/favicon.ico | |||
@@ -2,7 +2,7 @@ title: Make the indie web easier | |||
url: https://gilest.org/indie-easy.html | |||
hash_url: faa1d8cae94da6838ff9351e5df791ca | |||
archive_date: 2024-01-09 | |||
og_image: | |||
og_image: https://gilest.org/2024/dangerously-muddy.jpg | |||
description: | |||
favicon: https://gilest.org/favicon.ico | |||
@@ -2,7 +2,7 @@ title: In Loving Memory of Square Checkbox | |||
url: https://tonsky.me/blog/checkbox/ | |||
hash_url: ff566a58892db07815a327802fea66d3 | |||
archive_date: 2024-01-28 | |||
og_image: | |||
og_image: https://tonsky.me/blog/checkbox/checkbox@2x.png?t=1706539628 | |||
description: History of checkboxes and radio buttons in user interfaces | |||
favicon: https://tonsky.me/i/favicon.png | |||