"""Fetch additional metadata.""" | """Fetch additional metadata.""" | ||||
parsed_url = urlparse(url) | parsed_url = urlparse(url) | ||||
root_url = f"{parsed_url.scheme}://{parsed_url.netloc}/" | root_url = f"{parsed_url.scheme}://{parsed_url.netloc}/" | ||||
data = lassie.fetch(url) | |||||
data = lassie.fetch(url, all_images=True) | |||||
og_image = "" | og_image = "" | ||||
favicon = "" | favicon = "" | ||||
for image in data.get("images"): | for image in data.get("images"): | ||||
elif ".ico" in favicon and ".ico" not in image_src: | elif ".ico" in favicon and ".ico" not in image_src: | ||||
favicon = image_src | favicon = image_src | ||||
if not og_image: | |||||
for image in data.get("images"): | |||||
if image_type == "body_image": | |||||
image_src = image.get("src") | |||||
if "favicon" not in image_src: | |||||
og_image = image_src | |||||
break | |||||
# Fallback on server's default. | # Fallback on server's default. | ||||
if not favicon: | if not favicon: | ||||
favico_url = f"{root_url}favicon.ico" | favico_url = f"{root_url}favicon.ico" | ||||
def metadata(): | def metadata(): | ||||
"""Fetch additional metadata for existing archives.""" | """Fetch additional metadata for existing archives.""" | ||||
for cache in Cache.all(): | for cache in Cache.all(): | ||||
# That one is takin way too long. | |||||
# That one is taking way too long. | |||||
if cache.url.startswith("https://tw5.immateriel.fr"): | if cache.url.startswith("https://tw5.immateriel.fr"): | ||||
print("Skipping", cache.url) | |||||
print("Skipping (too long)", cache.url) | |||||
continue | |||||
if cache.og_image and cache.description and cache.favicon: | |||||
print("Skipping (all good)", cache.url) | |||||
continue | |||||
if cache.url.startswith( | |||||
( | |||||
"https://www.la-grange.net", | |||||
"https://gilest.org", | |||||
"https://vasilis.nl", | |||||
"https://www.danmcquillan.org", | |||||
"https://public-inbox.org", | |||||
) | |||||
) and (cache.og_image or cache.description or cache.favicon): | |||||
print("Skipping (known missing infos)", cache.url) | |||||
continue | continue | ||||
# if cache.description or cache.url.startswith( | |||||
# ( | |||||
# "https://www.la-grange.net", | |||||
# "https://gilest.org", | |||||
# "https://vasilis.nl", | |||||
# "https://www.danmcquillan.org", | |||||
# "https://public-inbox.org", | |||||
# ) | |||||
# ): | |||||
# print("Skipping", cache.url) | |||||
# continue | |||||
print("Fetching metadata for", cache.url, cache.title) | print("Fetching metadata for", cache.url, cache.title) | ||||
og_image, description, favicon = fetch_metadata( | og_image, description, favicon = fetch_metadata( | ||||
cache.title, cache.url, cache.description | cache.title, cache.url, cache.description |
url: https://iapop.com/deep-democracy/ | url: https://iapop.com/deep-democracy/ | ||||
hash_url: 1d60fc5548a6fe61da80a4e16892fa0c | hash_url: 1d60fc5548a6fe61da80a4e16892fa0c | ||||
archive_date: 2024-01-31 | archive_date: 2024-01-31 | ||||
og_image: | |||||
og_image: https://iapop.com/wp-content/uploads/2018/03/iapop_world_icon-blue-150px-tiny.png | |||||
description: Definition of Deep Democracy The concept of Deep Democracy was developed by Arnold Mindell. It is defined as an attitude and a principle. Attitude: Deep Democracy is an attitude that […] | description: Definition of Deep Democracy The concept of Deep Democracy was developed by Arnold Mindell. It is defined as an attitude and a principle. Attitude: Deep Democracy is an attitude that […] | ||||
favicon: https://iapop.com/wp-content/uploads/2018/03/iapop_world_icon-blue-150px-tiny.png | favicon: https://iapop.com/wp-content/uploads/2018/03/iapop_world_icon-blue-150px-tiny.png | ||||
url: https://www.la-grange.net/2024/01/23/legacy | url: https://www.la-grange.net/2024/01/23/legacy | ||||
hash_url: 1fe484434058e9c44d21bfebb0ddba31 | hash_url: 1fe484434058e9c44d21bfebb0ddba31 | ||||
archive_date: 2024-01-28 | archive_date: 2024-01-28 | ||||
og_image: | |||||
og_image: https://www.la-grange.net/2024/01/23/3152-ciel.jpg | |||||
description: | description: | ||||
favicon: https://www.la-grange.net/favicon.ico | favicon: https://www.la-grange.net/favicon.ico | ||||
url: https://www.la-grange.net/2024/01/22/carnet | url: https://www.la-grange.net/2024/01/22/carnet | ||||
hash_url: 790f724c45b26de460f9eeac04d48884 | hash_url: 790f724c45b26de460f9eeac04d48884 | ||||
archive_date: 2024-01-28 | archive_date: 2024-01-28 | ||||
og_image: | |||||
og_image: https://www.la-grange.net/2024/01/20/3134-carnets.jpg | |||||
description: | description: | ||||
favicon: https://www.la-grange.net/favicon.ico | favicon: https://www.la-grange.net/favicon.ico | ||||
url: https://www.la-grange.net/2024/01/11/pourquoi | url: https://www.la-grange.net/2024/01/11/pourquoi | ||||
hash_url: 87c468a4eddabe5d2c28e902d7f17504 | hash_url: 87c468a4eddabe5d2c28e902d7f17504 | ||||
archive_date: 2024-01-11 | archive_date: 2024-01-11 | ||||
og_image: | |||||
og_image: https://www.la-grange.net/2024/01/09/3045-glace.jpg | |||||
description: | description: | ||||
favicon: https://www.la-grange.net/favicon.ico | favicon: https://www.la-grange.net/favicon.ico | ||||
url: https://www.la-grange.net/2024/01/24/herbe | url: https://www.la-grange.net/2024/01/24/herbe | ||||
hash_url: 956819385548bba6e768563b12edc2d6 | hash_url: 956819385548bba6e768563b12edc2d6 | ||||
archive_date: 2024-01-28 | archive_date: 2024-01-28 | ||||
og_image: | |||||
og_image: https://www.la-grange.net/2024/01/24/3154-herbe.jpg | |||||
description: | description: | ||||
favicon: https://www.la-grange.net/favicon.ico | favicon: https://www.la-grange.net/favicon.ico | ||||
url: https://www.la-grange.net/2023/07/10/notes-train | url: https://www.la-grange.net/2023/07/10/notes-train | ||||
hash_url: 9bc04d41d25fc73391116d99b7259a3d | hash_url: 9bc04d41d25fc73391116d99b7259a3d | ||||
archive_date: 2024-01-07 | archive_date: 2024-01-07 | ||||
og_image: | |||||
og_image: https://www.la-grange.net/2023/07/10/0797-batiment-visage.jpg | |||||
description: | description: | ||||
favicon: https://www.la-grange.net/favicon.ico | favicon: https://www.la-grange.net/favicon.ico | ||||
url: https://www.danmcquillan.org/dataluddism.html | url: https://www.danmcquillan.org/dataluddism.html | ||||
hash_url: b1da1249f2db388d7e84d6ad23c2fc5d | hash_url: b1da1249f2db388d7e84d6ad23c2fc5d | ||||
archive_date: 2024-01-09 | archive_date: 2024-01-09 | ||||
og_image: | |||||
og_image: https://www.danmcquillan.org/images/burnmill.jpg | |||||
description: | description: | ||||
favicon: | favicon: | ||||
url: https://www.la-grange.net/2024/01/26/fraichement | url: https://www.la-grange.net/2024/01/26/fraichement | ||||
hash_url: b692faaa55fd2775e957b20e833e9e5e | hash_url: b692faaa55fd2775e957b20e833e9e5e | ||||
archive_date: 2024-01-28 | archive_date: 2024-01-28 | ||||
og_image: | |||||
og_image: https://www.la-grange.net/2024/01/26/3159-chauffage.jpg | |||||
description: | description: | ||||
favicon: https://www.la-grange.net/favicon.ico | favicon: https://www.la-grange.net/favicon.ico | ||||
url: https://www.duchess-france.fr/dossier/women%20in%20tech/alli%C3%A9s/2023/01/15/manuel-survie-femme-tech.html | url: https://www.duchess-france.fr/dossier/women%20in%20tech/alli%C3%A9s/2023/01/15/manuel-survie-femme-tech.html | ||||
hash_url: c4751e7c80b292e3533ee6b3e057b702 | hash_url: c4751e7c80b292e3533ee6b3e057b702 | ||||
archive_date: 2024-01-21 | archive_date: 2024-01-21 | ||||
og_image: | |||||
og_image: https://www.duchess-france.fr/assets/bandeau.jpeg | |||||
description: Je vois de plus en plus de femmes rejoindre l’informatique, et c’est une très bonne chose. Je vois aussi trop de femmes patir de sexisme ordinaire, se remettre en question encore et encore… et quitter le milieu au bout de quelques années. J’ai mis du temps à apprendre certaines choses. | description: Je vois de plus en plus de femmes rejoindre l’informatique, et c’est une très bonne chose. Je vois aussi trop de femmes patir de sexisme ordinaire, se remettre en question encore et encore… et quitter le milieu au bout de quelques années. J’ai mis du temps à apprendre certaines choses. | ||||
favicon: https://www.duchess-france.fr/favicon.ico | favicon: https://www.duchess-france.fr/favicon.ico | ||||
url: https://blog.glyph.im/2024/01/unsigned-commits.html | url: https://blog.glyph.im/2024/01/unsigned-commits.html | ||||
hash_url: ce5fdc61fd66cdb9ce548fb543eba986 | hash_url: ce5fdc61fd66cdb9ce548fb543eba986 | ||||
archive_date: 2024-01-25 | archive_date: 2024-01-25 | ||||
og_image: | |||||
og_image: https://blog.glyph.im/images/back9.png | |||||
description: Deciphering Glyph, the blog of Glyph Lefkowitz. | description: Deciphering Glyph, the blog of Glyph Lefkowitz. | ||||
favicon: https://blog.glyph.im/images/favicon.ico | favicon: https://blog.glyph.im/images/favicon.ico | ||||
url: https://kagifeedback.org/d/2808-reconsider-your-partnership-with-brave/6 | url: https://kagifeedback.org/d/2808-reconsider-your-partnership-with-brave/6 | ||||
hash_url: d236f33cf82727313d17cb23bf36a395 | hash_url: d236f33cf82727313d17cb23bf36a395 | ||||
archive_date: 2024-01-07 | archive_date: 2024-01-07 | ||||
og_image: | |||||
og_image: https://kagifeedback.org/assets/favicon-bmwk4ltf.png | |||||
description: Brave, as you know, is led by Brendan Eich. s homophobia is so disgusting that he was forced to resign as the leader... | description: Brave, as you know, is led by Brendan Eich. s homophobia is so disgusting that he was forced to resign as the leader... | ||||
favicon: https://kagifeedback.org/assets/favicon-bmwk4ltf.png | favicon: https://kagifeedback.org/assets/favicon-bmwk4ltf.png | ||||
url: https://www.la-grange.net/2024/01/06/ebauche | url: https://www.la-grange.net/2024/01/06/ebauche | ||||
hash_url: d75afc90a9d3c3b5a56b69446795fbb5 | hash_url: d75afc90a9d3c3b5a56b69446795fbb5 | ||||
archive_date: 2024-01-07 | archive_date: 2024-01-07 | ||||
og_image: | |||||
og_image: https://www.la-grange.net/2024/01/06/3008-furikake.jpg | |||||
description: | description: | ||||
favicon: https://www.la-grange.net/favicon.ico | favicon: https://www.la-grange.net/favicon.ico | ||||
url: https://gilest.org/indie-easy.html | url: https://gilest.org/indie-easy.html | ||||
hash_url: faa1d8cae94da6838ff9351e5df791ca | hash_url: faa1d8cae94da6838ff9351e5df791ca | ||||
archive_date: 2024-01-09 | archive_date: 2024-01-09 | ||||
og_image: | |||||
og_image: https://gilest.org/2024/dangerously-muddy.jpg | |||||
description: | description: | ||||
favicon: https://gilest.org/favicon.ico | favicon: https://gilest.org/favicon.ico | ||||
url: https://tonsky.me/blog/checkbox/ | url: https://tonsky.me/blog/checkbox/ | ||||
hash_url: ff566a58892db07815a327802fea66d3 | hash_url: ff566a58892db07815a327802fea66d3 | ||||
archive_date: 2024-01-28 | archive_date: 2024-01-28 | ||||
og_image: | |||||
og_image: https://tonsky.me/blog/checkbox/checkbox@2x.png?t=1706539628 | |||||
description: History of checkboxes and radio buttons in user interfaces | description: History of checkboxes and radio buttons in user interfaces | ||||
favicon: https://tonsky.me/i/favicon.png | favicon: https://tonsky.me/i/favicon.png | ||||