Browse Source

Fallback on first body image

master
David Larlet 2 months ago
parent
commit
99c0307189
Signed by: David Larlet <david@larlet.fr> GPG Key ID: 3E2953A359E7E7BD

+ 25
- 14
cache.py View File

@@ -161,7 +161,7 @@ def fetch_metadata(title, url, description):
"""Fetch additional metadata."""
parsed_url = urlparse(url)
root_url = f"{parsed_url.scheme}://{parsed_url.netloc}/"
data = lassie.fetch(url)
data = lassie.fetch(url, all_images=True)
og_image = ""
favicon = ""
for image in data.get("images"):
@@ -182,6 +182,14 @@ def fetch_metadata(title, url, description):
elif ".ico" in favicon and ".ico" not in image_src:
favicon = image_src

if not og_image:
for image in data.get("images"):
if image_type == "body_image":
image_src = image.get("src")
if "favicon" not in image_src:
og_image = image_src
break

# Fallback on server's default.
if not favicon:
favico_url = f"{root_url}favicon.ico"
@@ -199,21 +207,24 @@ def fetch_metadata(title, url, description):
def metadata():
"""Fetch additional metadata for existing archives."""
for cache in Cache.all():
# That one is takin way too long.
# That one is taking way too long.
if cache.url.startswith("https://tw5.immateriel.fr"):
print("Skipping", cache.url)
print("Skipping (too long)", cache.url)
continue
if cache.og_image and cache.description and cache.favicon:
print("Skipping (all good)", cache.url)
continue
if cache.url.startswith(
(
"https://www.la-grange.net",
"https://gilest.org",
"https://vasilis.nl",
"https://www.danmcquillan.org",
"https://public-inbox.org",
)
) and (cache.og_image or cache.description or cache.favicon):
print("Skipping (known missing infos)", cache.url)
continue
# if cache.description or cache.url.startswith(
# (
# "https://www.la-grange.net",
# "https://gilest.org",
# "https://vasilis.nl",
# "https://www.danmcquillan.org",
# "https://public-inbox.org",
# )
# ):
# print("Skipping", cache.url)
# continue
print("Fetching metadata for", cache.url, cache.title)
og_image, description, favicon = fetch_metadata(
cache.title, cache.url, cache.description

+ 1
- 1
cache/2024/1d60fc5548a6fe61da80a4e16892fa0c/index.md View File

@@ -2,7 +2,7 @@ title: Deep Democracy - IAPOP
url: https://iapop.com/deep-democracy/
hash_url: 1d60fc5548a6fe61da80a4e16892fa0c
archive_date: 2024-01-31
og_image:
og_image: https://iapop.com/wp-content/uploads/2018/03/iapop_world_icon-blue-150px-tiny.png
description: Definition of Deep Democracy The concept of Deep Democracy was developed by Arnold Mindell. It is defined as an attitude and a principle. Attitude: Deep Democracy is an attitude that […]
favicon: https://iapop.com/wp-content/uploads/2018/03/iapop_world_icon-blue-150px-tiny.png


+ 1
- 1
cache/2024/1fe484434058e9c44d21bfebb0ddba31/index.md View File

@@ -2,7 +2,7 @@ title: ce que nous laissons
url: https://www.la-grange.net/2024/01/23/legacy
hash_url: 1fe484434058e9c44d21bfebb0ddba31
archive_date: 2024-01-28
og_image:
og_image: https://www.la-grange.net/2024/01/23/3152-ciel.jpg
description:
favicon: https://www.la-grange.net/favicon.ico


+ 1
- 1
cache/2024/790f724c45b26de460f9eeac04d48884/index.md View File

@@ -2,7 +2,7 @@ title: carnet d'archéologue
url: https://www.la-grange.net/2024/01/22/carnet
hash_url: 790f724c45b26de460f9eeac04d48884
archive_date: 2024-01-28
og_image:
og_image: https://www.la-grange.net/2024/01/20/3134-carnets.jpg
description:
favicon: https://www.la-grange.net/favicon.ico


+ 1
- 1
cache/2024/87c468a4eddabe5d2c28e902d7f17504/index.md View File

@@ -2,7 +2,7 @@ title: je ne sais pas pourquoi
url: https://www.la-grange.net/2024/01/11/pourquoi
hash_url: 87c468a4eddabe5d2c28e902d7f17504
archive_date: 2024-01-11
og_image:
og_image: https://www.la-grange.net/2024/01/09/3045-glace.jpg
description:
favicon: https://www.la-grange.net/favicon.ico


+ 1
- 1
cache/2024/956819385548bba6e768563b12edc2d6/index.md View File

@@ -2,7 +2,7 @@ title: herbe
url: https://www.la-grange.net/2024/01/24/herbe
hash_url: 956819385548bba6e768563b12edc2d6
archive_date: 2024-01-28
og_image:
og_image: https://www.la-grange.net/2024/01/24/3154-herbe.jpg
description:
favicon: https://www.la-grange.net/favicon.ico


+ 1
- 1
cache/2024/9bc04d41d25fc73391116d99b7259a3d/index.md View File

@@ -2,7 +2,7 @@ title: notes
url: https://www.la-grange.net/2023/07/10/notes-train
hash_url: 9bc04d41d25fc73391116d99b7259a3d
archive_date: 2024-01-07
og_image:
og_image: https://www.la-grange.net/2023/07/10/0797-batiment-visage.jpg
description:
favicon: https://www.la-grange.net/favicon.ico


+ 1
- 1
cache/2024/b1da1249f2db388d7e84d6ad23c2fc5d/index.md View File

@@ -2,7 +2,7 @@ title: Data Luddism
url: https://www.danmcquillan.org/dataluddism.html
hash_url: b1da1249f2db388d7e84d6ad23c2fc5d
archive_date: 2024-01-09
og_image:
og_image: https://www.danmcquillan.org/images/burnmill.jpg
description:
favicon:


+ 1
- 1
cache/2024/b692faaa55fd2775e957b20e833e9e5e/index.md View File

@@ -2,7 +2,7 @@ title: fraîchement
url: https://www.la-grange.net/2024/01/26/fraichement
hash_url: b692faaa55fd2775e957b20e833e9e5e
archive_date: 2024-01-28
og_image:
og_image: https://www.la-grange.net/2024/01/26/3159-chauffage.jpg
description:
favicon: https://www.la-grange.net/favicon.ico


+ 1
- 1
cache/2024/c4751e7c80b292e3533ee6b3e057b702/index.md View File

@@ -2,7 +2,7 @@ title: Manuel de survie de la femme dans la tech
url: https://www.duchess-france.fr/dossier/women%20in%20tech/alli%C3%A9s/2023/01/15/manuel-survie-femme-tech.html
hash_url: c4751e7c80b292e3533ee6b3e057b702
archive_date: 2024-01-21
og_image:
og_image: https://www.duchess-france.fr/assets/bandeau.jpeg
description: Je vois de plus en plus de femmes rejoindre l’informatique, et c’est une très bonne chose. Je vois aussi trop de femmes patir de sexisme ordinaire, se remettre en question encore et encore… et quitter le milieu au bout de quelques années. J’ai mis du temps à apprendre certaines choses.
favicon: https://www.duchess-france.fr/favicon.ico


+ 1
- 1
cache/2024/ce5fdc61fd66cdb9ce548fb543eba986/index.md View File

@@ -2,7 +2,7 @@ title: Unsigned Commits
url: https://blog.glyph.im/2024/01/unsigned-commits.html
hash_url: ce5fdc61fd66cdb9ce548fb543eba986
archive_date: 2024-01-25
og_image:
og_image: https://blog.glyph.im/images/back9.png
description: Deciphering Glyph, the blog of Glyph Lefkowitz.
favicon: https://blog.glyph.im/images/favicon.ico


+ 1
- 1
cache/2024/d236f33cf82727313d17cb23bf36a395/index.md View File

@@ -2,7 +2,7 @@ title: Reconsider your partnership with Brave
url: https://kagifeedback.org/d/2808-reconsider-your-partnership-with-brave/6
hash_url: d236f33cf82727313d17cb23bf36a395
archive_date: 2024-01-07
og_image:
og_image: https://kagifeedback.org/assets/favicon-bmwk4ltf.png
description: Brave, as you know, is led by Brendan Eich. s homophobia is so disgusting that he was forced to resign as the leader...
favicon: https://kagifeedback.org/assets/favicon-bmwk4ltf.png


+ 1
- 1
cache/2024/d75afc90a9d3c3b5a56b69446795fbb5/index.md View File

@@ -2,7 +2,7 @@ title: plaisir d'ébauche
url: https://www.la-grange.net/2024/01/06/ebauche
hash_url: d75afc90a9d3c3b5a56b69446795fbb5
archive_date: 2024-01-07
og_image:
og_image: https://www.la-grange.net/2024/01/06/3008-furikake.jpg
description:
favicon: https://www.la-grange.net/favicon.ico


+ 1
- 1
cache/2024/faa1d8cae94da6838ff9351e5df791ca/index.md View File

@@ -2,7 +2,7 @@ title: Make the indie web easier
url: https://gilest.org/indie-easy.html
hash_url: faa1d8cae94da6838ff9351e5df791ca
archive_date: 2024-01-09
og_image:
og_image: https://gilest.org/2024/dangerously-muddy.jpg
description:
favicon: https://gilest.org/favicon.ico


+ 1
- 1
cache/2024/ff566a58892db07815a327802fea66d3/index.md View File

@@ -2,7 +2,7 @@ title: In Loving Memory of Square Checkbox
url: https://tonsky.me/blog/checkbox/
hash_url: ff566a58892db07815a327802fea66d3
archive_date: 2024-01-28
og_image:
og_image: https://tonsky.me/blog/checkbox/checkbox@2x.png?t=1706539628
description: History of checkboxes and radio buttons in user interfaces
favicon: https://tonsky.me/i/favicon.png


Loading…
Cancel
Save