A place to cache linked articles (think custom and personal wayback machine)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

index.html 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. <!doctype html><!-- This is a valid HTML5 document. -->
  2. <!-- Screen readers, SEO, extensions and so on. -->
  3. <html lang="fr">
  4. <!-- Has to be within the first 1024 bytes, hence before the `title` element
  5. See: https://www.w3.org/TR/2012/CR-html5-20121217/document-metadata.html#charset -->
  6. <meta charset="utf-8">
  7. <!-- Why no `X-UA-Compatible` meta: https://stackoverflow.com/a/6771584 -->
  8. <!-- The viewport meta is quite crowded and we are responsible for that.
  9. See: https://codepen.io/tigt/post/meta-viewport-for-2015 -->
  10. <meta name="viewport" content="width=device-width,initial-scale=1">
  11. <!-- Required to make a valid HTML5 document. -->
  12. <title>Host your own wikipedia backup (archive) — David Larlet</title>
  13. <meta name="description" content="Publication mise en cache pour en conserver une trace.">
  14. <!-- That good ol' feed, subscribe :). -->
  15. <link rel="alternate" type="application/atom+xml" title="Feed" href="/david/log/">
  16. <!-- Generated from https://realfavicongenerator.net/ such a mess. -->
  17. <link rel="apple-touch-icon" sizes="180x180" href="/static/david/icons2/apple-touch-icon.png">
  18. <link rel="icon" type="image/png" sizes="32x32" href="/static/david/icons2/favicon-32x32.png">
  19. <link rel="icon" type="image/png" sizes="16x16" href="/static/david/icons2/favicon-16x16.png">
  20. <link rel="manifest" href="/static/david/icons2/site.webmanifest">
  21. <link rel="mask-icon" href="/static/david/icons2/safari-pinned-tab.svg" color="#07486c">
  22. <link rel="shortcut icon" href="/static/david/icons2/favicon.ico">
  23. <meta name="msapplication-TileColor" content="#f7f7f7">
  24. <meta name="msapplication-config" content="/static/david/icons2/browserconfig.xml">
  25. <meta name="theme-color" content="#f7f7f7" media="(prefers-color-scheme: light)">
  26. <meta name="theme-color" content="#272727" media="(prefers-color-scheme: dark)">
  27. <!-- Documented, feel free to shoot an email. -->
  28. <link rel="stylesheet" href="/static/david/css/style_2021-01-20.css">
  29. <!-- See https://www.zachleat.com/web/comprehensive-webfonts/ for the trade-off. -->
  30. <link rel="preload" href="/static/david/css/fonts/triplicate_t4_poly_regular.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: light), (prefers-color-scheme: no-preference)" crossorigin>
  31. <link rel="preload" href="/static/david/css/fonts/triplicate_t4_poly_bold.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: light), (prefers-color-scheme: no-preference)" crossorigin>
  32. <link rel="preload" href="/static/david/css/fonts/triplicate_t4_poly_italic.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: light), (prefers-color-scheme: no-preference)" crossorigin>
  33. <link rel="preload" href="/static/david/css/fonts/triplicate_t3_regular.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: dark)" crossorigin>
  34. <link rel="preload" href="/static/david/css/fonts/triplicate_t3_bold.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: dark)" crossorigin>
  35. <link rel="preload" href="/static/david/css/fonts/triplicate_t3_italic.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: dark)" crossorigin>
  36. <script>
  37. function toggleTheme(themeName) {
  38. document.documentElement.classList.toggle(
  39. 'forced-dark',
  40. themeName === 'dark'
  41. )
  42. document.documentElement.classList.toggle(
  43. 'forced-light',
  44. themeName === 'light'
  45. )
  46. }
  47. const selectedTheme = localStorage.getItem('theme')
  48. if (selectedTheme !== 'undefined') {
  49. toggleTheme(selectedTheme)
  50. }
  51. </script>
  52. <meta name="robots" content="noindex, nofollow">
  53. <meta content="origin-when-cross-origin" name="referrer">
  54. <!-- Canonical URL for SEO purposes -->
  55. <link rel="canonical" href="https://dataswamp.org/~solene/2019-11-13-wikimedia-dump.html">
  56. <body class="remarkdown h1-underline h2-underline h3-underline em-underscore hr-center ul-star pre-tick" data-instant-intensity="viewport-all">
  57. <article>
  58. <header>
  59. <h1>Host your own wikipedia backup</h1>
  60. </header>
  61. <nav>
  62. <p class="center">
  63. <a href="/david/" title="Aller à l’accueil"><svg class="icon icon-home">
  64. <use xlink:href="/static/david/icons2/symbol-defs.svg#icon-home"></use>
  65. </svg> Accueil</a> •
  66. <a href="https://dataswamp.org/~solene/2019-11-13-wikimedia-dump.html" title="Lien vers le contenu original">Source originale</a>
  67. </p>
  68. </nav>
  69. <hr>
  70. <h2 id="wikipediaandopenzim">Wikipedia and openzim</h2>
  71. <p>If you ever wanted to host your own wikipedia replica, here is the simplest
  72. way.</p>
  73. <p>As wikipedia is REALLY huge, you don’t really want to host a php wikimedia
  74. software and load the huge database, instead, the project made the <em>openzim</em>
  75. format to compress the huge database that wikipedia became while allowing using
  76. it for fast searches.</p>
  77. <p>Sadly, on OpenBSD, we have no software reading zim files and most software
  78. requires the library openzim to work which requires extra work to get it as a
  79. package on OpenBSD.</p>
  80. <p>Hopefully, there is a python package implementing all you need as pure python
  81. to serve zim files over http and it’s easy to install.</p>
  82. <p>This tutorial should work on all others unix like systems but packages or
  83. binary names may change.</p>
  84. <h2 id="downloadingwikipedia">Downloading wikipedia</h2>
  85. <p>The project Kiwix is responsible for wikipedia files, they create regularly
  86. files from various projects (including stackexchange, gutenberg, wikibooks
  87. etc…) but for this tutorial we want wikipedia:
  88. <a href="https://wiki.kiwix.org/wiki/Content_in_all_languages">https://wiki.kiwix.org/wiki/Content_in_all_languages</a></p>
  89. <p>You will find a lot of files, the language is contained into the filename. Some
  90. filenames will also self explain if they contain everything or categories, and
  91. if they have pictures or not.</p>
  92. <p>The full French file is 31.4 GB worth.</p>
  93. <h2 id="runningtheserver">Running the server</h2>
  94. <p>For the next steps, I recommend setting up a new user dedicated to this.</p>
  95. <p>On OpenBSD, we will require python3 and pip:</p>
  96. <pre><code>$ doas pkg_add py3-pip--
  97. </code></pre>
  98. <p>Then we can use pip to fetch and install dependencies for the zimply software,
  99. the flag <code>--user</code> is rather important as it allows any user to download and
  100. install python libraries in its home folder instead of polluting the whole
  101. system as root.</p>
  102. <pre><code>$ pip3.7 install --user --upgrade zimply
  103. </code></pre>
  104. <p>I wrote a small script to start the server using the zim file as a parameter, I
  105. rarely write python so the script may not be high standard.</p>
  106. <p>File <strong>server.py</strong>:</p>
  107. <pre><code>from zimply import ZIMServer
  108. import sys
  109. import os.path
  110. if len(sys.argv) == 1:
  111. print("usage: " + sys.argv[0] + " file")
  112. exit(1)
  113. if os.path.exists(sys.argv[1]):
  114. ZIMServer(sys.argv[1])
  115. else:
  116. print("Can't find file " + sys.argv[1])
  117. </code></pre>
  118. <p>And then you can start the server using the command:</p>
  119. <pre><code>$ python3.7 server.py /path/to/wikipedia_fr_all_maxi_2019-08.zim
  120. </code></pre>
  121. <p>You will be able to access wikipedia on the url http://localhost:9454/</p>
  122. <p>Note that this is not a “wiki” as you can’t see history and edit/create pages.</p>
  123. <p>This kind of backup is used in place like Cuba or Africa areas where people
  124. don’t have unlimited internet access, the project lead by Kiwix allow more
  125. people to access knowledge.</p>
  126. </article>
  127. <hr>
  128. <footer>
  129. <p>
  130. <a href="/david/" title="Aller à l’accueil"><svg class="icon icon-home">
  131. <use xlink:href="/static/david/icons2/symbol-defs.svg#icon-home"></use>
  132. </svg> Accueil</a> •
  133. <a href="/david/log/" title="Accès au flux RSS"><svg class="icon icon-rss2">
  134. <use xlink:href="/static/david/icons2/symbol-defs.svg#icon-rss2"></use>
  135. </svg> Suivre</a> •
  136. <a href="http://larlet.com" title="Go to my English profile" data-instant><svg class="icon icon-user-tie">
  137. <use xlink:href="/static/david/icons2/symbol-defs.svg#icon-user-tie"></use>
  138. </svg> Pro</a> •
  139. <a href="mailto:david%40larlet.fr" title="Envoyer un courriel"><svg class="icon icon-mail">
  140. <use xlink:href="/static/david/icons2/symbol-defs.svg#icon-mail"></use>
  141. </svg> Email</a> •
  142. <abbr class="nowrap" title="Hébergeur : Alwaysdata, 62 rue Tiquetonne 75002 Paris, +33184162340"><svg class="icon icon-hammer2">
  143. <use xlink:href="/static/david/icons2/symbol-defs.svg#icon-hammer2"></use>
  144. </svg> Légal</abbr>
  145. </p>
  146. <template id="theme-selector">
  147. <form>
  148. <fieldset>
  149. <legend><svg class="icon icon-brightness-contrast">
  150. <use xlink:href="/static/david/icons2/symbol-defs.svg#icon-brightness-contrast"></use>
  151. </svg> Thème</legend>
  152. <label>
  153. <input type="radio" value="auto" name="chosen-color-scheme" checked> Auto
  154. </label>
  155. <label>
  156. <input type="radio" value="dark" name="chosen-color-scheme"> Foncé
  157. </label>
  158. <label>
  159. <input type="radio" value="light" name="chosen-color-scheme"> Clair
  160. </label>
  161. </fieldset>
  162. </form>
  163. </template>
  164. </footer>
  165. <script src="/static/david/js/instantpage-5.1.0.min.js" type="module"></script>
  166. <script>
  167. function loadThemeForm(templateName) {
  168. const themeSelectorTemplate = document.querySelector(templateName)
  169. const form = themeSelectorTemplate.content.firstElementChild
  170. themeSelectorTemplate.replaceWith(form)
  171. form.addEventListener('change', (e) => {
  172. const chosenColorScheme = e.target.value
  173. localStorage.setItem('theme', chosenColorScheme)
  174. toggleTheme(chosenColorScheme)
  175. })
  176. const selectedTheme = localStorage.getItem('theme')
  177. if (selectedTheme && selectedTheme !== 'undefined') {
  178. form.querySelector(`[value="${selectedTheme}"]`).checked = true
  179. }
  180. }
  181. const prefersColorSchemeDark = '(prefers-color-scheme: dark)'
  182. window.addEventListener('load', () => {
  183. let hasDarkRules = false
  184. for (const styleSheet of Array.from(document.styleSheets)) {
  185. let mediaRules = []
  186. for (const cssRule of styleSheet.cssRules) {
  187. if (cssRule.type !== CSSRule.MEDIA_RULE) {
  188. continue
  189. }
  190. // WARNING: Safari does not have/supports `conditionText`.
  191. if (cssRule.conditionText) {
  192. if (cssRule.conditionText !== prefersColorSchemeDark) {
  193. continue
  194. }
  195. } else {
  196. if (cssRule.cssText.startsWith(prefersColorSchemeDark)) {
  197. continue
  198. }
  199. }
  200. mediaRules = mediaRules.concat(Array.from(cssRule.cssRules))
  201. }
  202. // WARNING: do not try to insert a Rule to a styleSheet you are
  203. // currently iterating on, otherwise the browser will be stuck
  204. // in a infinite loop…
  205. for (const mediaRule of mediaRules) {
  206. styleSheet.insertRule(mediaRule.cssText)
  207. hasDarkRules = true
  208. }
  209. }
  210. if (hasDarkRules) {
  211. loadThemeForm('#theme-selector')
  212. }
  213. })
  214. </script>
  215. </body>
  216. </html>