A place to cache linked articles (think custom and personal wayback machine)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

index.html 19KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. <!doctype html><!-- This is a valid HTML5 document. -->
  2. <!-- Screen readers, SEO, extensions and so on. -->
  3. <html lang="fr">
  4. <!-- Has to be within the first 1024 bytes, hence before the <title>
  5. See: https://www.w3.org/TR/2012/CR-html5-20121217/document-metadata.html#charset -->
  6. <meta charset="utf-8">
  7. <!-- Why no `X-UA-Compatible` meta: https://stackoverflow.com/a/6771584 -->
  8. <!-- The viewport meta is quite crowded and we are responsible for that.
  9. See: https://codepen.io/tigt/post/meta-viewport-for-2015 -->
  10. <meta name="viewport" content="width=device-width,initial-scale=1">
  11. <!-- Required to make a valid HTML5 document. -->
  12. <title>Reverse Engineering Source Code of the Biontech Pfizer Vaccine: Part 2 (archive) — David Larlet</title>
  13. <meta name="description" content="Publication mise en cache pour en conserver une trace.">
  14. <!-- That good ol' feed, subscribe :). -->
  15. <link rel="alternate" type="application/atom+xml" title="Feed" href="/david/log/">
  16. <!-- Generated from https://realfavicongenerator.net/ such a mess. -->
  17. <link rel="apple-touch-icon" sizes="180x180" href="/static/david/icons2/apple-touch-icon.png">
  18. <link rel="icon" type="image/png" sizes="32x32" href="/static/david/icons2/favicon-32x32.png">
  19. <link rel="icon" type="image/png" sizes="16x16" href="/static/david/icons2/favicon-16x16.png">
  20. <link rel="manifest" href="/static/david/icons2/site.webmanifest">
  21. <link rel="mask-icon" href="/static/david/icons2/safari-pinned-tab.svg" color="#07486c">
  22. <link rel="shortcut icon" href="/static/david/icons2/favicon.ico">
  23. <meta name="msapplication-TileColor" content="#f0f0ea">
  24. <meta name="msapplication-config" content="/static/david/icons2/browserconfig.xml">
  25. <meta name="theme-color" content="#f0f0ea">
  26. <!-- Documented, feel free to shoot an email. -->
  27. <link rel="stylesheet" href="/static/david/css/style_2021-01-20.css">
  28. <!-- See https://www.zachleat.com/web/comprehensive-webfonts/ for the trade-off. -->
  29. <link rel="preload" href="/static/david/css/fonts/triplicate_t4_poly_regular.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: light), (prefers-color-scheme: no-preference)" crossorigin>
  30. <link rel="preload" href="/static/david/css/fonts/triplicate_t4_poly_bold.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: light), (prefers-color-scheme: no-preference)" crossorigin>
  31. <link rel="preload" href="/static/david/css/fonts/triplicate_t4_poly_italic.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: light), (prefers-color-scheme: no-preference)" crossorigin>
  32. <link rel="preload" href="/static/david/css/fonts/triplicate_t3_regular.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: dark)" crossorigin>
  33. <link rel="preload" href="/static/david/css/fonts/triplicate_t3_bold.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: dark)" crossorigin>
  34. <link rel="preload" href="/static/david/css/fonts/triplicate_t3_italic.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: dark)" crossorigin>
  35. <script>
  36. function toggleTheme(themeName) {
  37. document.documentElement.classList.toggle(
  38. 'forced-dark',
  39. themeName === 'dark'
  40. )
  41. document.documentElement.classList.toggle(
  42. 'forced-light',
  43. themeName === 'light'
  44. )
  45. }
  46. const selectedTheme = localStorage.getItem('theme')
  47. if (selectedTheme !== 'undefined') {
  48. toggleTheme(selectedTheme)
  49. }
  50. </script>
  51. <meta name="robots" content="noindex, nofollow">
  52. <meta content="origin-when-cross-origin" name="referrer">
  53. <!-- Canonical URL for SEO purposes -->
  54. <link rel="canonical" href="https://berthub.eu/articles/posts/part-2-reverse-engineering-source-code-of-the-biontech-pfizer-vaccine/">
  55. <body class="remarkdown h1-underline h2-underline h3-underline em-underscore hr-center ul-star pre-tick">
  56. <article>
  57. <header>
  58. <h1>Reverse Engineering Source Code of the Biontech Pfizer Vaccine: Part 2</h1>
  59. </header>
  60. <nav>
  61. <p class="center">
  62. <a href="/david/" title="Aller à l’accueil"><svg class="icon icon-home">
  63. <use xlink:href="/static/david/icons2/symbol-defs.svg#icon-home"></use>
  64. </svg> Accueil</a> •
  65. <a href="https://berthub.eu/articles/posts/part-2-reverse-engineering-source-code-of-the-biontech-pfizer-vaccine/" title="Lien vers le contenu original">Source originale</a>
  66. </p>
  67. </nav>
  68. <hr>
  69. <p>All BNT162b2 vaccine data on this page is sourced from this <a href="https://mednet-communities.net/inn/db/media/docs/11889.doc" target="_blank">World Health
  70. Organization
  71. document</a>.</p>
  72. <blockquote>
  73. <p>This is a living page, shared already so people can get going! But
  74. check back frequently for updates.</p>
  75. </blockquote>
  76. <p><em>Translation</em>:
  77. <a href="https://renaudguerin.net/posts/partie-2-explorons-le-code-source-du-vaccin-biontech-pfizer/" target="_blank">Français</a>
  78. / <a href="https://msakai.github.io/bnt162b2/part-2-reverse-engineering-source-code-of-the-biontech-pfizer-vaccine.ja/" target="_blank">日本語</a></p>
  79. <p>In short: the vaccine mRNA has been optimized by the manufacturer by
  80. changing bits of RNA from (say) <code>UUU</code> to <code>UUC</code>, and people would like to
  81. understand the logic behind these changes. This challenge is quite close to what
  82. cryptologists and reverse engineering people encounter regularly. On this
  83. page, you’ll find all the details you need to get cracking to reverse
  84. engineer just HOW the vaccine has been optimized.</p>
  85. <p>I thought this would just be a fun puzzle, but I have just been informed that
  86. figuring out the optimization procedure &amp; documenting it is tremendously
  87. important for researchers around the world, as this would help them design
  88. code for proteins and vaccines.</p>
  89. <p>So, if you want to help vaccine research, do read on!</p>
  90. <h2 id="the-leader-board">The leader board</h2>
  91. <p>Here are the current best entrants to the optimization algorithm (average of 20 runs):</p>
  92. <h2 id="biontech">BioNTech</h2>
  93. <p>We should all be very grateful that BioNTech has shared this data with us.
  94. And of course we should also be grateful to the many many researchers and
  95. lab workers that worked for decades to bring the state of the art to the
  96. point that such a vaccine could be developed. It is marvelous.</p>
  97. <p>Because it is so marvelous, I want to understand everything about the
  98. vaccine. I wrote a page <a href="https://berthub.eu/articles/posts/reverse-engineering-source-code-of-the-biontech-pfizer-vaccine/" target="_blank">Reverse Engineering the source code of the BioNTech/Pfizer SARS-CoV-2
  99. Vaccine</a>
  100. that describes in some detail what is in the mRNA of the vaccine. It helps
  101. to read this page before continuing, I promise you it will be interesting.</p>
  102. <p>The post left open some questions however, and this is where it gets
  103. fascinating.</p>
  104. <h2 id="the-codon-optimization">The codon optimization</h2>
  105. <p>The vaccine contains RNA code for a very <em>slightly</em> modified copy of the
  106. SARS-CoV-2 S protein.</p>
  107. <p>The RNA code of the vaccine itself however is <em>highly</em> modified from the viral original!
  108. This has been done by the manufacturer, based on their understanding of
  109. nature.</p>
  110. <p>And from what we understand, these modifications make the vaccine <strong>much
  111. much more</strong> effective. It would be a lot of fun to understand these
  112. modifications. It might for example explain why the Moderna vaccine needs
  113. 100 micrograms and the BioNTech vaccine only 30 micrograms.</p>
  114. <p>Here is the beginning of the S protein in both the virus and the BNT162b2
  115. vaccine RNA code. Exclamation marks denote differences.</p>
  116. <pre><code>Virus: AUG UUU GUU UUU CUU GUU UUA UUG CCA CUA GUC UCU AGU CAG UGU GUU
  117. Vaccine: AUG UUC GUG UUC CUG GUG CUG CUG CCU CUG GUG UCC AGC CAG UGU GUG
  118. ! ! ! ! ! ! ! ! ! ! ! ! !
  119. </code></pre>
  120. <p>RNA is a string (literally) of RNA characters, <code>A</code>, <code>C</code>, <code>G</code> and <code>U</code>. There is no
  121. physical framing on there, but it makes sense to analyse it in groups of
  122. three.</p>
  123. <p>Each group (called a codon) maps to an amino acid (denoted by a capital
  124. letter). A string of amino acids is a protein. Here is what that looks
  125. like:</p>
  126. <pre><code>Virus: AUG UUU GUU UUU CUU GUU UUA UUG CCA CUA GUC UCU AGU CAG UGU GUU
  127. M F V F L V L L P L V S S Q C V
  128. Vaccine: AUG UUC GUG UUC CUG GUG CUG CUG CCU CUG GUG UCC AGC CAG UGU GUG
  129. ! ! ! ! ! ! ! ! ! ! ! ! !
  130. </code></pre>
  131. <p>Here we can see that while the codons are different, the amino acid version
  132. is the same. There are 4*4*4 codons but only 20 amino acids. This means you
  133. can typically change every codon into one of two others, and still code for
  134. the same amino acid.</p>
  135. <p>So in the second codon, <code>UUU</code> was changed to <code>UUC</code>. This is a net addition
  136. of one ‘C’ to the vaccine. The third codon changed from <code>GUU</code> to <code>GUG</code>, which is
  137. a net addition of one <code>G</code>.</p>
  138. <p><strong>It is known that a higher fraction of <code>G</code> and <code>C</code> characters improves the
  139. efficiency of an mRNA vaccine</strong>.</p>
  140. <p>Now, if that was all there was to it, this could be the end of this page.
  141. “The algorithm is change codons so we get more G and C in there”. But then
  142. we meet the 9th codon which changes <code>CCA</code> to <code>CCU</code>.</p>
  143. <p>Throughout the ~4000 characters of the vaccine, this happens many times.</p>
  144. <h2 id="our-challenge">Our challenge</h2>
  145. <p>The goal is: find an algorithm that modifies the ‘wild type’ RNA code into
  146. the BNT162b2 one. Because everyone would like to understand how to turn
  147. viral RNA into an effective vaccine. The algorithm does not need to
  148. reproduce the <em>exact</em> RNA code of course, but it would be super nice if it
  149. came up with something very similar, while also being brief.</p>
  150. <p>To help you, I have provided the data in a number of forms, as described on
  151. <a href="https://github.com/berthubert/bnt162b2" target="_blank">the GitHub page</a>.</p>
  152. <blockquote>
  153. <p>Note that in these files the <code>U</code> mentioned above appears as a <code>T</code>. <code>U</code> and
  154. <code>T</code> are the RNA and DNA manifestations of the same information.</p>
  155. </blockquote>
  156. <p>The easiest place to start might be the
  157. ‘<a href="https://github.com/berthubert/bnt162b2/blob/master/side-by-side.csv" target="_blank">side-by-side.csv</a>‘
  158. file. This lists the original and modified version of each codon, side by
  159. side:</p>
  160. <pre><code>abspos,codonOrig,codonVaccine
  161. 0,ATG,ATG
  162. 3,TTT,TTC
  163. 6,GTT,GTG
  164. ...
  165. 3813,TAC,TAC
  166. 3816,ACA,ACA
  167. 3819,TAA,TGA
  168. </code></pre>
  169. <p>There is also an equivalency table that shows wich codons can be
  170. interchanged without changing the amino acid output. Please find this in
  171. <a href="https://github.com/berthubert/bnt162b2/blob/master/codon-table-grouped.csv" target="_blank">codon-table-grouped.csv</a>.
  172. There is also a visual version
  173. <a href="https://en.wikipedia.org/wiki/DNA_and_RNA_codon_tables#Standard_DNA_codon_table" target="_blank">here</a>.</p>
  174. <h2 id="a-sample-algorithm">A sample algorithm</h2>
  175. <p>On the <a href="https://github.com/berthubert/bnt162b2" target="_blank">GitHub repository</a> you can
  176. find
  177. <a href="https://github.com/berthubert/bnt162b2/blob/master/3rd-gc.go" target="_blank">3rd-gc.go</a>
  178. (and
  179. <a href="https://github.com/berthubert/bnt162b2/blob/master/3rd-gc.py" target="_blank">3rd-gc.py</a>).</p>
  180. <p>These implement a simple strategy that works like this:</p>
  181. <ul>
  182. <li>If a virus codon already ended on G or C, copy it to the vaccine mRNA</li>
  183. <li>If not, replace last nucleotide in codon by a G, see if the amino acid
  184. still matches, if so, copy to the vaccine mRNA</li>
  185. <li>Try the same with a C</li>
  186. <li>Otherwise copy as is</li>
  187. </ul>
  188. <p>Or in <code>golang</code>:</p>
  189. <pre><code>// base case, don't do anything
  190. our = vir
  191. // don't do anything if codon ends on G or C already
  192. if(vir[2] == 'G' || vir[2] =='C') {
  193. fmt.Printf("Codon ended on G or C already, not doing anything.")
  194. } else {
  195. prop = vir[:2]+"G"
  196. fmt.Printf("Attempting G substitution, new candidate '%s'. ", prop)
  197. if(c2s[vir] == c2s[prop]) {
  198. fmt.Printf("Amino acid still the same, done!")
  199. our = prop
  200. } else {
  201. fmt.Printf("Oops, amino acid changed. Trying C, new candidate '%s'. ", prop)
  202. prop = vir[:2]+"C"
  203. if(c2s[vir] == c2s[prop]) {
  204. fmt.Printf("Amino acid still the same, done!")
  205. our=prop
  206. }
  207. }
  208. }
  209. </code></pre>
  210. <p>This achieves a rather poor 53.1% match with the BioNTech RNA vaccine, but
  211. it is a start.</p>
  212. <p>When you design your algorithm, be sure to only base your choices on the
  213. virus RNA. Do not peek into the BioNTech RNA!</p>
  214. <p>If you have achieved a score beyond 53.1% please email a link to your code
  215. to bert@hubertnet.nl (or <a href="https://twitter.com/PowerDNS_Bert" target="_blank">@PowerDNS_Bert</a>
  216. and I’ll put it on the leader board at the top of this page!</p>
  217. <h2 id="things-that-will-help">Things that will help</h2>
  218. <p>As with every form of reverse engineering or cryptanalysis, it helps to
  219. understand what we are looking at.</p>
  220. <h2 id="gc-ratio">GC ratio</h2>
  221. <p>We know that one goal of the ‘codon optimization’ is to get more <code>C</code>s and
  222. <code>G</code>s into the vaccine version of the RNA. However, there is also a limit to
  223. that. In DNA, which is also used to manufacture the vaccine, <code>G</code> and <code>C</code>
  224. bind together strongly, to the point that if you put too many of these
  225. ‘nucleotides’ in there, the DNA will no longer be replicated efficiently.</p>
  226. <p>So some modifications may actually happen to manage <em>down</em> the GC percentage of a
  227. stretch of DNA if it was getting too high.</p>
  228. <p>I <a href="https://twitter.com/PowerDNS_Bert/status/1344036143961169920" target="_blank">tweeted about this</a> earlier.</p>
  229. <h2 id="codon-optimization">Codon optimization</h2>
  230. <p>Some codons are rare in human DNA, or in certain cells. It may be that some
  231. codons are replaced by other ones simply because they are more frequently
  232. used by some cells.</p>
  233. <p>I <a href="https://twitter.com/PowerDNS_Bert/status/1344400081802448897" target="_blank">tweeted about this</a>
  234. earlier.</p>
  235. <h2 id="rna-folding">RNA folding</h2>
  236. <p>We’ve been looking at codons up to here. The RNA itself however does not
  237. know about codons, there are no markers that say where a codon begins and
  238. ends. The first codon on a protein however is always ATG (or AUG in RNA).</p>
  239. <p>RNA curls up into a shape. This shape might help evade the immune system or
  240. it might improve translation into amino acids. This only depends on the
  241. sequence of RNA nucleotides and not on specific codons.</p>
  242. <p>You can submit RNA sequences to <a href="http://rna.tbi.univie.ac.at/cgi-bin/RNAWebSuite/RNAfold.cgi" target="_blank">this server of the Institute for
  243. Theoretical Chemistry at the University of
  244. Vienna</a> and it
  245. will fold RNA for you. This is a very advanced server that does meticulous
  246. calculations.</p>
  247. <p>This <a href="https://en.wikipedia.org/wiki/Nucleic_acid_structure_prediction" target="_blank">Wikipedia
  248. page</a>
  249. describes how this works.</p>
  250. <p>It may be that some optimizations improve folding.</p>
  251. <p>I am also told that this paper by Moderna (another mRNA vaccine
  252. manufacturer) may be relevant:
  253. <a href="https://www.pnas.org/content/116/48/24075" target="_blank">mRNA structure regulates protein expression through changes in functional
  254. half-life</a>.</p>
  255. </article>
  256. <hr>
  257. <footer>
  258. <p>
  259. <a href="/david/" title="Aller à l’accueil"><svg class="icon icon-home">
  260. <use xlink:href="/static/david/icons2/symbol-defs.svg#icon-home"></use>
  261. </svg> Accueil</a> •
  262. <a href="/david/log/" title="Accès au flux RSS"><svg class="icon icon-rss2">
  263. <use xlink:href="/static/david/icons2/symbol-defs.svg#icon-rss2"></use>
  264. </svg> RSS</a> •
  265. <a href="http://larlet.com" title="Go to my English profile" data-instant><svg class="icon icon-user-tie">
  266. <use xlink:href="/static/david/icons2/symbol-defs.svg#icon-user-tie"></use>
  267. </svg> Pro</a> •
  268. <a href="mailto:david%40larlet.fr" title="Envoyer un courriel"><svg class="icon icon-mail">
  269. <use xlink:href="/static/david/icons2/symbol-defs.svg#icon-mail"></use>
  270. </svg> Email</a> •
  271. <abbr class="nowrap" title="Hébergeur : Alwaysdata, 62 rue Tiquetonne 75002 Paris, +33184162340"><svg class="icon icon-hammer2">
  272. <use xlink:href="/static/david/icons2/symbol-defs.svg#icon-hammer2"></use>
  273. </svg> Légal</abbr>
  274. </p>
  275. <template id="theme-selector">
  276. <form>
  277. <fieldset>
  278. <legend><svg class="icon icon-brightness-contrast">
  279. <use xlink:href="/static/david/icons2/symbol-defs.svg#icon-brightness-contrast"></use>
  280. </svg> Thème</legend>
  281. <label>
  282. <input type="radio" value="auto" name="chosen-color-scheme" checked> Auto
  283. </label>
  284. <label>
  285. <input type="radio" value="dark" name="chosen-color-scheme"> Foncé
  286. </label>
  287. <label>
  288. <input type="radio" value="light" name="chosen-color-scheme"> Clair
  289. </label>
  290. </fieldset>
  291. </form>
  292. </template>
  293. </footer>
  294. <script>
  295. function loadThemeForm(templateName) {
  296. const themeSelectorTemplate = document.querySelector(templateName)
  297. const form = themeSelectorTemplate.content.firstElementChild
  298. themeSelectorTemplate.replaceWith(form)
  299. form.addEventListener('change', (e) => {
  300. const chosenColorScheme = e.target.value
  301. localStorage.setItem('theme', chosenColorScheme)
  302. toggleTheme(chosenColorScheme)
  303. })
  304. const selectedTheme = localStorage.getItem('theme')
  305. if (selectedTheme && selectedTheme !== 'undefined') {
  306. form.querySelector(`[value="${selectedTheme}"]`).checked = true
  307. }
  308. }
  309. const prefersColorSchemeDark = '(prefers-color-scheme: dark)'
  310. window.addEventListener('load', () => {
  311. let hasDarkRules = false
  312. for (const styleSheet of Array.from(document.styleSheets)) {
  313. let mediaRules = []
  314. for (const cssRule of styleSheet.cssRules) {
  315. if (cssRule.type !== CSSRule.MEDIA_RULE) {
  316. continue
  317. }
  318. // WARNING: Safari does not have/supports `conditionText`.
  319. if (cssRule.conditionText) {
  320. if (cssRule.conditionText !== prefersColorSchemeDark) {
  321. continue
  322. }
  323. } else {
  324. if (cssRule.cssText.startsWith(prefersColorSchemeDark)) {
  325. continue
  326. }
  327. }
  328. mediaRules = mediaRules.concat(Array.from(cssRule.cssRules))
  329. }
  330. // WARNING: do not try to insert a Rule to a styleSheet you are
  331. // currently iterating on, otherwise the browser will be stuck
  332. // in a infinite loop…
  333. for (const mediaRule of mediaRules) {
  334. styleSheet.insertRule(mediaRule.cssText)
  335. hasDarkRules = true
  336. }
  337. }
  338. if (hasDarkRules) {
  339. loadThemeForm('#theme-selector')
  340. }
  341. })
  342. </script>
  343. </body>
  344. </html>