Browse Source

Fallback on first body image

master
David Larlet 3 months ago
parent
commit
99c0307189
Signed by: David Larlet <david@larlet.fr> GPG Key ID: 3E2953A359E7E7BD

+ 25
- 14
cache.py View File

"""Fetch additional metadata.""" """Fetch additional metadata."""
parsed_url = urlparse(url) parsed_url = urlparse(url)
root_url = f"{parsed_url.scheme}://{parsed_url.netloc}/" root_url = f"{parsed_url.scheme}://{parsed_url.netloc}/"
data = lassie.fetch(url)
data = lassie.fetch(url, all_images=True)
og_image = "" og_image = ""
favicon = "" favicon = ""
for image in data.get("images"): for image in data.get("images"):
elif ".ico" in favicon and ".ico" not in image_src: elif ".ico" in favicon and ".ico" not in image_src:
favicon = image_src favicon = image_src


if not og_image:
for image in data.get("images"):
if image_type == "body_image":
image_src = image.get("src")
if "favicon" not in image_src:
og_image = image_src
break

# Fallback on server's default. # Fallback on server's default.
if not favicon: if not favicon:
favico_url = f"{root_url}favicon.ico" favico_url = f"{root_url}favicon.ico"
def metadata(): def metadata():
"""Fetch additional metadata for existing archives.""" """Fetch additional metadata for existing archives."""
for cache in Cache.all(): for cache in Cache.all():
# That one is takin way too long.
# That one is taking way too long.
if cache.url.startswith("https://tw5.immateriel.fr"): if cache.url.startswith("https://tw5.immateriel.fr"):
print("Skipping", cache.url)
print("Skipping (too long)", cache.url)
continue
if cache.og_image and cache.description and cache.favicon:
print("Skipping (all good)", cache.url)
continue
if cache.url.startswith(
(
"https://www.la-grange.net",
"https://gilest.org",
"https://vasilis.nl",
"https://www.danmcquillan.org",
"https://public-inbox.org",
)
) and (cache.og_image or cache.description or cache.favicon):
print("Skipping (known missing infos)", cache.url)
continue continue
# if cache.description or cache.url.startswith(
# (
# "https://www.la-grange.net",
# "https://gilest.org",
# "https://vasilis.nl",
# "https://www.danmcquillan.org",
# "https://public-inbox.org",
# )
# ):
# print("Skipping", cache.url)
# continue
print("Fetching metadata for", cache.url, cache.title) print("Fetching metadata for", cache.url, cache.title)
og_image, description, favicon = fetch_metadata( og_image, description, favicon = fetch_metadata(
cache.title, cache.url, cache.description cache.title, cache.url, cache.description

+ 1
- 1
cache/2024/1d60fc5548a6fe61da80a4e16892fa0c/index.md View File

url: https://iapop.com/deep-democracy/ url: https://iapop.com/deep-democracy/
hash_url: 1d60fc5548a6fe61da80a4e16892fa0c hash_url: 1d60fc5548a6fe61da80a4e16892fa0c
archive_date: 2024-01-31 archive_date: 2024-01-31
og_image:
og_image: https://iapop.com/wp-content/uploads/2018/03/iapop_world_icon-blue-150px-tiny.png
description: Definition of Deep Democracy The concept of Deep Democracy was developed by Arnold Mindell. It is defined as an attitude and a principle. Attitude: Deep Democracy is an attitude that […] description: Definition of Deep Democracy The concept of Deep Democracy was developed by Arnold Mindell. It is defined as an attitude and a principle. Attitude: Deep Democracy is an attitude that […]
favicon: https://iapop.com/wp-content/uploads/2018/03/iapop_world_icon-blue-150px-tiny.png favicon: https://iapop.com/wp-content/uploads/2018/03/iapop_world_icon-blue-150px-tiny.png



+ 1
- 1
cache/2024/1fe484434058e9c44d21bfebb0ddba31/index.md View File

url: https://www.la-grange.net/2024/01/23/legacy url: https://www.la-grange.net/2024/01/23/legacy
hash_url: 1fe484434058e9c44d21bfebb0ddba31 hash_url: 1fe484434058e9c44d21bfebb0ddba31
archive_date: 2024-01-28 archive_date: 2024-01-28
og_image:
og_image: https://www.la-grange.net/2024/01/23/3152-ciel.jpg
description: description:
favicon: https://www.la-grange.net/favicon.ico favicon: https://www.la-grange.net/favicon.ico



+ 1
- 1
cache/2024/790f724c45b26de460f9eeac04d48884/index.md View File

url: https://www.la-grange.net/2024/01/22/carnet url: https://www.la-grange.net/2024/01/22/carnet
hash_url: 790f724c45b26de460f9eeac04d48884 hash_url: 790f724c45b26de460f9eeac04d48884
archive_date: 2024-01-28 archive_date: 2024-01-28
og_image:
og_image: https://www.la-grange.net/2024/01/20/3134-carnets.jpg
description: description:
favicon: https://www.la-grange.net/favicon.ico favicon: https://www.la-grange.net/favicon.ico



+ 1
- 1
cache/2024/87c468a4eddabe5d2c28e902d7f17504/index.md View File

url: https://www.la-grange.net/2024/01/11/pourquoi url: https://www.la-grange.net/2024/01/11/pourquoi
hash_url: 87c468a4eddabe5d2c28e902d7f17504 hash_url: 87c468a4eddabe5d2c28e902d7f17504
archive_date: 2024-01-11 archive_date: 2024-01-11
og_image:
og_image: https://www.la-grange.net/2024/01/09/3045-glace.jpg
description: description:
favicon: https://www.la-grange.net/favicon.ico favicon: https://www.la-grange.net/favicon.ico



+ 1
- 1
cache/2024/956819385548bba6e768563b12edc2d6/index.md View File

url: https://www.la-grange.net/2024/01/24/herbe url: https://www.la-grange.net/2024/01/24/herbe
hash_url: 956819385548bba6e768563b12edc2d6 hash_url: 956819385548bba6e768563b12edc2d6
archive_date: 2024-01-28 archive_date: 2024-01-28
og_image:
og_image: https://www.la-grange.net/2024/01/24/3154-herbe.jpg
description: description:
favicon: https://www.la-grange.net/favicon.ico favicon: https://www.la-grange.net/favicon.ico



+ 1
- 1
cache/2024/9bc04d41d25fc73391116d99b7259a3d/index.md View File

url: https://www.la-grange.net/2023/07/10/notes-train url: https://www.la-grange.net/2023/07/10/notes-train
hash_url: 9bc04d41d25fc73391116d99b7259a3d hash_url: 9bc04d41d25fc73391116d99b7259a3d
archive_date: 2024-01-07 archive_date: 2024-01-07
og_image:
og_image: https://www.la-grange.net/2023/07/10/0797-batiment-visage.jpg
description: description:
favicon: https://www.la-grange.net/favicon.ico favicon: https://www.la-grange.net/favicon.ico



+ 1
- 1
cache/2024/b1da1249f2db388d7e84d6ad23c2fc5d/index.md View File

url: https://www.danmcquillan.org/dataluddism.html url: https://www.danmcquillan.org/dataluddism.html
hash_url: b1da1249f2db388d7e84d6ad23c2fc5d hash_url: b1da1249f2db388d7e84d6ad23c2fc5d
archive_date: 2024-01-09 archive_date: 2024-01-09
og_image:
og_image: https://www.danmcquillan.org/images/burnmill.jpg
description: description:
favicon: favicon:



+ 1
- 1
cache/2024/b692faaa55fd2775e957b20e833e9e5e/index.md View File

url: https://www.la-grange.net/2024/01/26/fraichement url: https://www.la-grange.net/2024/01/26/fraichement
hash_url: b692faaa55fd2775e957b20e833e9e5e hash_url: b692faaa55fd2775e957b20e833e9e5e
archive_date: 2024-01-28 archive_date: 2024-01-28
og_image:
og_image: https://www.la-grange.net/2024/01/26/3159-chauffage.jpg
description: description:
favicon: https://www.la-grange.net/favicon.ico favicon: https://www.la-grange.net/favicon.ico



+ 1
- 1
cache/2024/c4751e7c80b292e3533ee6b3e057b702/index.md View File

url: https://www.duchess-france.fr/dossier/women%20in%20tech/alli%C3%A9s/2023/01/15/manuel-survie-femme-tech.html url: https://www.duchess-france.fr/dossier/women%20in%20tech/alli%C3%A9s/2023/01/15/manuel-survie-femme-tech.html
hash_url: c4751e7c80b292e3533ee6b3e057b702 hash_url: c4751e7c80b292e3533ee6b3e057b702
archive_date: 2024-01-21 archive_date: 2024-01-21
og_image:
og_image: https://www.duchess-france.fr/assets/bandeau.jpeg
description: Je vois de plus en plus de femmes rejoindre l’informatique, et c’est une très bonne chose. Je vois aussi trop de femmes patir de sexisme ordinaire, se remettre en question encore et encore… et quitter le milieu au bout de quelques années. J’ai mis du temps à apprendre certaines choses. description: Je vois de plus en plus de femmes rejoindre l’informatique, et c’est une très bonne chose. Je vois aussi trop de femmes patir de sexisme ordinaire, se remettre en question encore et encore… et quitter le milieu au bout de quelques années. J’ai mis du temps à apprendre certaines choses.
favicon: https://www.duchess-france.fr/favicon.ico favicon: https://www.duchess-france.fr/favicon.ico



+ 1
- 1
cache/2024/ce5fdc61fd66cdb9ce548fb543eba986/index.md View File

url: https://blog.glyph.im/2024/01/unsigned-commits.html url: https://blog.glyph.im/2024/01/unsigned-commits.html
hash_url: ce5fdc61fd66cdb9ce548fb543eba986 hash_url: ce5fdc61fd66cdb9ce548fb543eba986
archive_date: 2024-01-25 archive_date: 2024-01-25
og_image:
og_image: https://blog.glyph.im/images/back9.png
description: Deciphering Glyph, the blog of Glyph Lefkowitz. description: Deciphering Glyph, the blog of Glyph Lefkowitz.
favicon: https://blog.glyph.im/images/favicon.ico favicon: https://blog.glyph.im/images/favicon.ico



+ 1
- 1
cache/2024/d236f33cf82727313d17cb23bf36a395/index.md View File

url: https://kagifeedback.org/d/2808-reconsider-your-partnership-with-brave/6 url: https://kagifeedback.org/d/2808-reconsider-your-partnership-with-brave/6
hash_url: d236f33cf82727313d17cb23bf36a395 hash_url: d236f33cf82727313d17cb23bf36a395
archive_date: 2024-01-07 archive_date: 2024-01-07
og_image:
og_image: https://kagifeedback.org/assets/favicon-bmwk4ltf.png
description: Brave, as you know, is led by Brendan Eich. s homophobia is so disgusting that he was forced to resign as the leader... description: Brave, as you know, is led by Brendan Eich. s homophobia is so disgusting that he was forced to resign as the leader...
favicon: https://kagifeedback.org/assets/favicon-bmwk4ltf.png favicon: https://kagifeedback.org/assets/favicon-bmwk4ltf.png



+ 1
- 1
cache/2024/d75afc90a9d3c3b5a56b69446795fbb5/index.md View File

url: https://www.la-grange.net/2024/01/06/ebauche url: https://www.la-grange.net/2024/01/06/ebauche
hash_url: d75afc90a9d3c3b5a56b69446795fbb5 hash_url: d75afc90a9d3c3b5a56b69446795fbb5
archive_date: 2024-01-07 archive_date: 2024-01-07
og_image:
og_image: https://www.la-grange.net/2024/01/06/3008-furikake.jpg
description: description:
favicon: https://www.la-grange.net/favicon.ico favicon: https://www.la-grange.net/favicon.ico



+ 1
- 1
cache/2024/faa1d8cae94da6838ff9351e5df791ca/index.md View File

url: https://gilest.org/indie-easy.html url: https://gilest.org/indie-easy.html
hash_url: faa1d8cae94da6838ff9351e5df791ca hash_url: faa1d8cae94da6838ff9351e5df791ca
archive_date: 2024-01-09 archive_date: 2024-01-09
og_image:
og_image: https://gilest.org/2024/dangerously-muddy.jpg
description: description:
favicon: https://gilest.org/favicon.ico favicon: https://gilest.org/favicon.ico



+ 1
- 1
cache/2024/ff566a58892db07815a327802fea66d3/index.md View File

url: https://tonsky.me/blog/checkbox/ url: https://tonsky.me/blog/checkbox/
hash_url: ff566a58892db07815a327802fea66d3 hash_url: ff566a58892db07815a327802fea66d3
archive_date: 2024-01-28 archive_date: 2024-01-28
og_image:
og_image: https://tonsky.me/blog/checkbox/checkbox@2x.png?t=1706539628
description: History of checkboxes and radio buttons in user interfaces description: History of checkboxes and radio buttons in user interfaces
favicon: https://tonsky.me/i/favicon.png favicon: https://tonsky.me/i/favicon.png



Loading…
Cancel
Save