A place to cache linked articles (think custom and personal wayback machine)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. <!doctype html><!-- This is a valid HTML5 document. -->
  2. <!-- Screen readers, SEO, extensions and so on. -->
  3. <html lang="fr">
  4. <!-- Has to be within the first 1024 bytes, hence before the `title` element
  5. See: https://www.w3.org/TR/2012/CR-html5-20121217/document-metadata.html#charset -->
  6. <meta charset="utf-8">
  7. <!-- Why no `X-UA-Compatible` meta: https://stackoverflow.com/a/6771584 -->
  8. <!-- The viewport meta is quite crowded and we are responsible for that.
  9. See: https://codepen.io/tigt/post/meta-viewport-for-2015 -->
  10. <meta name="viewport" content="width=device-width,initial-scale=1">
  11. <!-- Required to make a valid HTML5 document. -->
  12. <title>Reverse Engineering Source Code of the Biontech Pfizer Vaccine: Part 2 (archive) — David Larlet</title>
  13. <meta name="description" content="Publication mise en cache pour en conserver une trace.">
  14. <!-- That good ol' feed, subscribe :). -->
  15. <link rel="alternate" type="application/atom+xml" title="Feed" href="/david/log/">
  16. <!-- Generated from https://realfavicongenerator.net/ such a mess. -->
  17. <link rel="apple-touch-icon" sizes="180x180" href="/static/david/icons2/apple-touch-icon.png">
  18. <link rel="icon" type="image/png" sizes="32x32" href="/static/david/icons2/favicon-32x32.png">
  19. <link rel="icon" type="image/png" sizes="16x16" href="/static/david/icons2/favicon-16x16.png">
  20. <link rel="manifest" href="/static/david/icons2/site.webmanifest">
  21. <link rel="mask-icon" href="/static/david/icons2/safari-pinned-tab.svg" color="#07486c">
  22. <link rel="shortcut icon" href="/static/david/icons2/favicon.ico">
  23. <meta name="msapplication-TileColor" content="#f7f7f7">
  24. <meta name="msapplication-config" content="/static/david/icons2/browserconfig.xml">
  25. <meta name="theme-color" content="#f7f7f7" media="(prefers-color-scheme: light)">
  26. <meta name="theme-color" content="#272727" media="(prefers-color-scheme: dark)">
  27. <!-- Documented, feel free to shoot an email. -->
  28. <link rel="stylesheet" href="/static/david/css/style_2021-01-20.css">
  29. <!-- See https://www.zachleat.com/web/comprehensive-webfonts/ for the trade-off. -->
  30. <link rel="preload" href="/static/david/css/fonts/triplicate_t4_poly_regular.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: light), (prefers-color-scheme: no-preference)" crossorigin>
  31. <link rel="preload" href="/static/david/css/fonts/triplicate_t4_poly_bold.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: light), (prefers-color-scheme: no-preference)" crossorigin>
  32. <link rel="preload" href="/static/david/css/fonts/triplicate_t4_poly_italic.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: light), (prefers-color-scheme: no-preference)" crossorigin>
  33. <link rel="preload" href="/static/david/css/fonts/triplicate_t3_regular.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: dark)" crossorigin>
  34. <link rel="preload" href="/static/david/css/fonts/triplicate_t3_bold.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: dark)" crossorigin>
  35. <link rel="preload" href="/static/david/css/fonts/triplicate_t3_italic.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: dark)" crossorigin>
  36. <script>
  37. function toggleTheme(themeName) {
  38. document.documentElement.classList.toggle(
  39. 'forced-dark',
  40. themeName === 'dark'
  41. )
  42. document.documentElement.classList.toggle(
  43. 'forced-light',
  44. themeName === 'light'
  45. )
  46. }
  47. const selectedTheme = localStorage.getItem('theme')
  48. if (selectedTheme !== 'undefined') {
  49. toggleTheme(selectedTheme)
  50. }
  51. </script>
  52. <meta name="robots" content="noindex, nofollow">
  53. <meta content="origin-when-cross-origin" name="referrer">
  54. <!-- Canonical URL for SEO purposes -->
  55. <link rel="canonical" href="https://berthub.eu/articles/posts/part-2-reverse-engineering-source-code-of-the-biontech-pfizer-vaccine/">
  56. <body class="remarkdown h1-underline h2-underline h3-underline em-underscore hr-center ul-star pre-tick" data-instant-intensity="viewport-all">
  57. <article>
  58. <header>
  59. <h1>Reverse Engineering Source Code of the Biontech Pfizer Vaccine: Part 2</h1>
  60. </header>
  61. <nav>
  62. <p class="center">
  63. <a href="/david/" title="Aller à l’accueil"><svg class="icon icon-home">
  64. <use xlink:href="/static/david/icons2/symbol-defs-2021-12.svg#icon-home"></use>
  65. </svg> Accueil</a> •
  66. <a href="https://berthub.eu/articles/posts/part-2-reverse-engineering-source-code-of-the-biontech-pfizer-vaccine/" title="Lien vers le contenu original">Source originale</a>
  67. </p>
  68. </nav>
  69. <hr>
  70. <p>All BNT162b2 vaccine data on this page is sourced from this <a href="https://mednet-communities.net/inn/db/media/docs/11889.doc" target="_blank">World Health
  71. Organization
  72. document</a>.</p>
  73. <blockquote>
  74. <p>This is a living page, shared already so people can get going! But
  75. check back frequently for updates.</p>
  76. </blockquote>
  77. <p><em>Translation</em>:
  78. <a href="https://renaudguerin.net/posts/partie-2-explorons-le-code-source-du-vaccin-biontech-pfizer/" target="_blank">Français</a>
  79. / <a href="https://msakai.github.io/bnt162b2/part-2-reverse-engineering-source-code-of-the-biontech-pfizer-vaccine.ja/" target="_blank">日本語</a></p>
  80. <p>In short: the vaccine mRNA has been optimized by the manufacturer by
  81. changing bits of RNA from (say) <code>UUU</code> to <code>UUC</code>, and people would like to
  82. understand the logic behind these changes. This challenge is quite close to what
  83. cryptologists and reverse engineering people encounter regularly. On this
  84. page, you’ll find all the details you need to get cracking to reverse
  85. engineer just HOW the vaccine has been optimized.</p>
  86. <p>I thought this would just be a fun puzzle, but I have just been informed that
  87. figuring out the optimization procedure &amp; documenting it is tremendously
  88. important for researchers around the world, as this would help them design
  89. code for proteins and vaccines.</p>
  90. <p>So, if you want to help vaccine research, do read on!</p>
  91. <h2 id="the-leader-board">The leader board</h2>
  92. <p>Here are the current best entrants to the optimization algorithm (average of 20 runs):</p>
  93. <h2 id="biontech">BioNTech</h2>
  94. <p>We should all be very grateful that BioNTech has shared this data with us.
  95. And of course we should also be grateful to the many many researchers and
  96. lab workers that worked for decades to bring the state of the art to the
  97. point that such a vaccine could be developed. It is marvelous.</p>
  98. <p>Because it is so marvelous, I want to understand everything about the
  99. vaccine. I wrote a page <a href="https://berthub.eu/articles/posts/reverse-engineering-source-code-of-the-biontech-pfizer-vaccine/" target="_blank">Reverse Engineering the source code of the BioNTech/Pfizer SARS-CoV-2
  100. Vaccine</a>
  101. that describes in some detail what is in the mRNA of the vaccine. It helps
  102. to read this page before continuing, I promise you it will be interesting.</p>
  103. <p>The post left open some questions however, and this is where it gets
  104. fascinating.</p>
  105. <h2 id="the-codon-optimization">The codon optimization</h2>
  106. <p>The vaccine contains RNA code for a very <em>slightly</em> modified copy of the
  107. SARS-CoV-2 S protein.</p>
  108. <p>The RNA code of the vaccine itself however is <em>highly</em> modified from the viral original!
  109. This has been done by the manufacturer, based on their understanding of
  110. nature.</p>
  111. <p>And from what we understand, these modifications make the vaccine <strong>much
  112. much more</strong> effective. It would be a lot of fun to understand these
  113. modifications. It might for example explain why the Moderna vaccine needs
  114. 100 micrograms and the BioNTech vaccine only 30 micrograms.</p>
  115. <p>Here is the beginning of the S protein in both the virus and the BNT162b2
  116. vaccine RNA code. Exclamation marks denote differences.</p>
  117. <pre><code>Virus: AUG UUU GUU UUU CUU GUU UUA UUG CCA CUA GUC UCU AGU CAG UGU GUU
  118. Vaccine: AUG UUC GUG UUC CUG GUG CUG CUG CCU CUG GUG UCC AGC CAG UGU GUG
  119. ! ! ! ! ! ! ! ! ! ! ! ! !
  120. </code></pre>
  121. <p>RNA is a string (literally) of RNA characters, <code>A</code>, <code>C</code>, <code>G</code> and <code>U</code>. There is no
  122. physical framing on there, but it makes sense to analyse it in groups of
  123. three.</p>
  124. <p>Each group (called a codon) maps to an amino acid (denoted by a capital
  125. letter). A string of amino acids is a protein. Here is what that looks
  126. like:</p>
  127. <pre><code>Virus: AUG UUU GUU UUU CUU GUU UUA UUG CCA CUA GUC UCU AGU CAG UGU GUU
  128. M F V F L V L L P L V S S Q C V
  129. Vaccine: AUG UUC GUG UUC CUG GUG CUG CUG CCU CUG GUG UCC AGC CAG UGU GUG
  130. ! ! ! ! ! ! ! ! ! ! ! ! !
  131. </code></pre>
  132. <p>Here we can see that while the codons are different, the amino acid version
  133. is the same. There are 4*4*4 codons but only 20 amino acids. This means you
  134. can typically change every codon into one of two others, and still code for
  135. the same amino acid.</p>
  136. <p>So in the second codon, <code>UUU</code> was changed to <code>UUC</code>. This is a net addition
  137. of one ‘C’ to the vaccine. The third codon changed from <code>GUU</code> to <code>GUG</code>, which is
  138. a net addition of one <code>G</code>.</p>
  139. <p><strong>It is known that a higher fraction of <code>G</code> and <code>C</code> characters improves the
  140. efficiency of an mRNA vaccine</strong>.</p>
  141. <p>Now, if that was all there was to it, this could be the end of this page.
  142. “The algorithm is change codons so we get more G and C in there”. But then
  143. we meet the 9th codon which changes <code>CCA</code> to <code>CCU</code>.</p>
  144. <p>Throughout the ~4000 characters of the vaccine, this happens many times.</p>
  145. <h2 id="our-challenge">Our challenge</h2>
  146. <p>The goal is: find an algorithm that modifies the ‘wild type’ RNA code into
  147. the BNT162b2 one. Because everyone would like to understand how to turn
  148. viral RNA into an effective vaccine. The algorithm does not need to
  149. reproduce the <em>exact</em> RNA code of course, but it would be super nice if it
  150. came up with something very similar, while also being brief.</p>
  151. <p>To help you, I have provided the data in a number of forms, as described on
  152. <a href="https://github.com/berthubert/bnt162b2" target="_blank">the GitHub page</a>.</p>
  153. <blockquote>
  154. <p>Note that in these files the <code>U</code> mentioned above appears as a <code>T</code>. <code>U</code> and
  155. <code>T</code> are the RNA and DNA manifestations of the same information.</p>
  156. </blockquote>
  157. <p>The easiest place to start might be the
  158. ‘<a href="https://github.com/berthubert/bnt162b2/blob/master/side-by-side.csv" target="_blank">side-by-side.csv</a>‘
  159. file. This lists the original and modified version of each codon, side by
  160. side:</p>
  161. <pre><code>abspos,codonOrig,codonVaccine
  162. 0,ATG,ATG
  163. 3,TTT,TTC
  164. 6,GTT,GTG
  165. ...
  166. 3813,TAC,TAC
  167. 3816,ACA,ACA
  168. 3819,TAA,TGA
  169. </code></pre>
  170. <p>There is also an equivalency table that shows wich codons can be
  171. interchanged without changing the amino acid output. Please find this in
  172. <a href="https://github.com/berthubert/bnt162b2/blob/master/codon-table-grouped.csv" target="_blank">codon-table-grouped.csv</a>.
  173. There is also a visual version
  174. <a href="https://en.wikipedia.org/wiki/DNA_and_RNA_codon_tables#Standard_DNA_codon_table" target="_blank">here</a>.</p>
  175. <h2 id="a-sample-algorithm">A sample algorithm</h2>
  176. <p>On the <a href="https://github.com/berthubert/bnt162b2" target="_blank">GitHub repository</a> you can
  177. find
  178. <a href="https://github.com/berthubert/bnt162b2/blob/master/3rd-gc.go" target="_blank">3rd-gc.go</a>
  179. (and
  180. <a href="https://github.com/berthubert/bnt162b2/blob/master/3rd-gc.py" target="_blank">3rd-gc.py</a>).</p>
  181. <p>These implement a simple strategy that works like this:</p>
  182. <ul>
  183. <li>If a virus codon already ended on G or C, copy it to the vaccine mRNA</li>
  184. <li>If not, replace last nucleotide in codon by a G, see if the amino acid
  185. still matches, if so, copy to the vaccine mRNA</li>
  186. <li>Try the same with a C</li>
  187. <li>Otherwise copy as is</li>
  188. </ul>
  189. <p>Or in <code>golang</code>:</p>
  190. <pre><code>// base case, don't do anything
  191. our = vir
  192. // don't do anything if codon ends on G or C already
  193. if(vir[2] == 'G' || vir[2] =='C') {
  194. fmt.Printf("Codon ended on G or C already, not doing anything.")
  195. } else {
  196. prop = vir[:2]+"G"
  197. fmt.Printf("Attempting G substitution, new candidate '%s'. ", prop)
  198. if(c2s[vir] == c2s[prop]) {
  199. fmt.Printf("Amino acid still the same, done!")
  200. our = prop
  201. } else {
  202. fmt.Printf("Oops, amino acid changed. Trying C, new candidate '%s'. ", prop)
  203. prop = vir[:2]+"C"
  204. if(c2s[vir] == c2s[prop]) {
  205. fmt.Printf("Amino acid still the same, done!")
  206. our=prop
  207. }
  208. }
  209. }
  210. </code></pre>
  211. <p>This achieves a rather poor 53.1% match with the BioNTech RNA vaccine, but
  212. it is a start.</p>
  213. <p>When you design your algorithm, be sure to only base your choices on the
  214. virus RNA. Do not peek into the BioNTech RNA!</p>
  215. <p>If you have achieved a score beyond 53.1% please email a link to your code
  216. to bert@hubertnet.nl (or <a href="https://twitter.com/PowerDNS_Bert" target="_blank">@PowerDNS_Bert</a>
  217. and I’ll put it on the leader board at the top of this page!</p>
  218. <h2 id="things-that-will-help">Things that will help</h2>
  219. <p>As with every form of reverse engineering or cryptanalysis, it helps to
  220. understand what we are looking at.</p>
  221. <h2 id="gc-ratio">GC ratio</h2>
  222. <p>We know that one goal of the ‘codon optimization’ is to get more <code>C</code>s and
  223. <code>G</code>s into the vaccine version of the RNA. However, there is also a limit to
  224. that. In DNA, which is also used to manufacture the vaccine, <code>G</code> and <code>C</code>
  225. bind together strongly, to the point that if you put too many of these
  226. ‘nucleotides’ in there, the DNA will no longer be replicated efficiently.</p>
  227. <p>So some modifications may actually happen to manage <em>down</em> the GC percentage of a
  228. stretch of DNA if it was getting too high.</p>
  229. <p>I <a href="https://twitter.com/PowerDNS_Bert/status/1344036143961169920" target="_blank">tweeted about this</a> earlier.</p>
  230. <h2 id="codon-optimization">Codon optimization</h2>
  231. <p>Some codons are rare in human DNA, or in certain cells. It may be that some
  232. codons are replaced by other ones simply because they are more frequently
  233. used by some cells.</p>
  234. <p>I <a href="https://twitter.com/PowerDNS_Bert/status/1344400081802448897" target="_blank">tweeted about this</a>
  235. earlier.</p>
  236. <h2 id="rna-folding">RNA folding</h2>
  237. <p>We’ve been looking at codons up to here. The RNA itself however does not
  238. know about codons, there are no markers that say where a codon begins and
  239. ends. The first codon on a protein however is always ATG (or AUG in RNA).</p>
  240. <p>RNA curls up into a shape. This shape might help evade the immune system or
  241. it might improve translation into amino acids. This only depends on the
  242. sequence of RNA nucleotides and not on specific codons.</p>
  243. <p>You can submit RNA sequences to <a href="http://rna.tbi.univie.ac.at/cgi-bin/RNAWebSuite/RNAfold.cgi" target="_blank">this server of the Institute for
  244. Theoretical Chemistry at the University of
  245. Vienna</a> and it
  246. will fold RNA for you. This is a very advanced server that does meticulous
  247. calculations.</p>
  248. <p>This <a href="https://en.wikipedia.org/wiki/Nucleic_acid_structure_prediction" target="_blank">Wikipedia
  249. page</a>
  250. describes how this works.</p>
  251. <p>It may be that some optimizations improve folding.</p>
  252. <p>I am also told that this paper by Moderna (another mRNA vaccine
  253. manufacturer) may be relevant:
  254. <a href="https://www.pnas.org/content/116/48/24075" target="_blank">mRNA structure regulates protein expression through changes in functional
  255. half-life</a>.</p>
  256. </article>
  257. <hr>
  258. <footer>
  259. <p>
  260. <a href="/david/" title="Aller à l’accueil"><svg class="icon icon-home">
  261. <use xlink:href="/static/david/icons2/symbol-defs-2021-12.svg#icon-home"></use>
  262. </svg> Accueil</a> •
  263. <a href="/david/log/" title="Accès au flux RSS"><svg class="icon icon-rss2">
  264. <use xlink:href="/static/david/icons2/symbol-defs-2021-12.svg#icon-rss2"></use>
  265. </svg> Suivre</a> •
  266. <a href="http://larlet.com" title="Go to my English profile" data-instant><svg class="icon icon-user-tie">
  267. <use xlink:href="/static/david/icons2/symbol-defs-2021-12.svg#icon-user-tie"></use>
  268. </svg> Pro</a> •
  269. <a href="mailto:david%40larlet.fr" title="Envoyer un courriel"><svg class="icon icon-mail">
  270. <use xlink:href="/static/david/icons2/symbol-defs-2021-12.svg#icon-mail"></use>
  271. </svg> Email</a> •
  272. <abbr class="nowrap" title="Hébergeur : Alwaysdata, 62 rue Tiquetonne 75002 Paris, +33184162340"><svg class="icon icon-hammer2">
  273. <use xlink:href="/static/david/icons2/symbol-defs-2021-12.svg#icon-hammer2"></use>
  274. </svg> Légal</abbr>
  275. </p>
  276. <template id="theme-selector">
  277. <form>
  278. <fieldset>
  279. <legend><svg class="icon icon-brightness-contrast">
  280. <use xlink:href="/static/david/icons2/symbol-defs-2021-12.svg#icon-brightness-contrast"></use>
  281. </svg> Thème</legend>
  282. <label>
  283. <input type="radio" value="auto" name="chosen-color-scheme" checked> Auto
  284. </label>
  285. <label>
  286. <input type="radio" value="dark" name="chosen-color-scheme"> Foncé
  287. </label>
  288. <label>
  289. <input type="radio" value="light" name="chosen-color-scheme"> Clair
  290. </label>
  291. </fieldset>
  292. </form>
  293. </template>
  294. </footer>
  295. <script src="/static/david/js/instantpage-5.1.0.min.js" type="module"></script>
  296. <script>
  297. function loadThemeForm(templateName) {
  298. const themeSelectorTemplate = document.querySelector(templateName)
  299. const form = themeSelectorTemplate.content.firstElementChild
  300. themeSelectorTemplate.replaceWith(form)
  301. form.addEventListener('change', (e) => {
  302. const chosenColorScheme = e.target.value
  303. localStorage.setItem('theme', chosenColorScheme)
  304. toggleTheme(chosenColorScheme)
  305. })
  306. const selectedTheme = localStorage.getItem('theme')
  307. if (selectedTheme && selectedTheme !== 'undefined') {
  308. form.querySelector(`[value="${selectedTheme}"]`).checked = true
  309. }
  310. }
  311. const prefersColorSchemeDark = '(prefers-color-scheme: dark)'
  312. window.addEventListener('load', () => {
  313. let hasDarkRules = false
  314. for (const styleSheet of Array.from(document.styleSheets)) {
  315. let mediaRules = []
  316. for (const cssRule of styleSheet.cssRules) {
  317. if (cssRule.type !== CSSRule.MEDIA_RULE) {
  318. continue
  319. }
  320. // WARNING: Safari does not have/supports `conditionText`.
  321. if (cssRule.conditionText) {
  322. if (cssRule.conditionText !== prefersColorSchemeDark) {
  323. continue
  324. }
  325. } else {
  326. if (cssRule.cssText.startsWith(prefersColorSchemeDark)) {
  327. continue
  328. }
  329. }
  330. mediaRules = mediaRules.concat(Array.from(cssRule.cssRules))
  331. }
  332. // WARNING: do not try to insert a Rule to a styleSheet you are
  333. // currently iterating on, otherwise the browser will be stuck
  334. // in a infinite loop…
  335. for (const mediaRule of mediaRules) {
  336. styleSheet.insertRule(mediaRule.cssText)
  337. hasDarkRules = true
  338. }
  339. }
  340. if (hasDarkRules) {
  341. loadThemeForm('#theme-selector')
  342. }
  343. })
  344. </script>
  345. </body>
  346. </html>