A place to cache linked articles (think custom and personal wayback machine)
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

index.html 35KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718
  1. <!doctype html><!-- This is a valid HTML5 document. -->
  2. <!-- Screen readers, SEO, extensions and so on. -->
  3. <html lang="fr">
  4. <!-- Has to be within the first 1024 bytes, hence before the `title` element
  5. See: https://www.w3.org/TR/2012/CR-html5-20121217/document-metadata.html#charset -->
  6. <meta charset="utf-8">
  7. <!-- Why no `X-UA-Compatible` meta: https://stackoverflow.com/a/6771584 -->
  8. <!-- The viewport meta is quite crowded and we are responsible for that.
  9. See: https://codepen.io/tigt/post/meta-viewport-for-2015 -->
  10. <meta name="viewport" content="width=device-width,initial-scale=1">
  11. <!-- Required to make a valid HTML5 document. -->
  12. <title>Reverse Engineering the source code of the BioNTech/Pfizer SARS-CoV-2 Vaccine (archive) — David Larlet</title>
  13. <meta name="description" content="Publication mise en cache pour en conserver une trace.">
  14. <!-- That good ol' feed, subscribe :). -->
  15. <link rel="alternate" type="application/atom+xml" title="Feed" href="/david/log/">
  16. <!-- Generated from https://realfavicongenerator.net/ such a mess. -->
  17. <link rel="apple-touch-icon" sizes="180x180" href="/static/david/icons2/apple-touch-icon.png">
  18. <link rel="icon" type="image/png" sizes="32x32" href="/static/david/icons2/favicon-32x32.png">
  19. <link rel="icon" type="image/png" sizes="16x16" href="/static/david/icons2/favicon-16x16.png">
  20. <link rel="manifest" href="/static/david/icons2/site.webmanifest">
  21. <link rel="mask-icon" href="/static/david/icons2/safari-pinned-tab.svg" color="#07486c">
  22. <link rel="shortcut icon" href="/static/david/icons2/favicon.ico">
  23. <meta name="msapplication-TileColor" content="#f7f7f7">
  24. <meta name="msapplication-config" content="/static/david/icons2/browserconfig.xml">
  25. <meta name="theme-color" content="#f7f7f7" media="(prefers-color-scheme: light)">
  26. <meta name="theme-color" content="#272727" media="(prefers-color-scheme: dark)">
  27. <!-- Documented, feel free to shoot an email. -->
  28. <link rel="stylesheet" href="/static/david/css/style_2021-01-20.css">
  29. <!-- See https://www.zachleat.com/web/comprehensive-webfonts/ for the trade-off. -->
  30. <link rel="preload" href="/static/david/css/fonts/triplicate_t4_poly_regular.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: light), (prefers-color-scheme: no-preference)" crossorigin>
  31. <link rel="preload" href="/static/david/css/fonts/triplicate_t4_poly_bold.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: light), (prefers-color-scheme: no-preference)" crossorigin>
  32. <link rel="preload" href="/static/david/css/fonts/triplicate_t4_poly_italic.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: light), (prefers-color-scheme: no-preference)" crossorigin>
  33. <link rel="preload" href="/static/david/css/fonts/triplicate_t3_regular.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: dark)" crossorigin>
  34. <link rel="preload" href="/static/david/css/fonts/triplicate_t3_bold.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: dark)" crossorigin>
  35. <link rel="preload" href="/static/david/css/fonts/triplicate_t3_italic.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: dark)" crossorigin>
  36. <script>
  37. function toggleTheme(themeName) {
  38. document.documentElement.classList.toggle(
  39. 'forced-dark',
  40. themeName === 'dark'
  41. )
  42. document.documentElement.classList.toggle(
  43. 'forced-light',
  44. themeName === 'light'
  45. )
  46. }
  47. const selectedTheme = localStorage.getItem('theme')
  48. if (selectedTheme !== 'undefined') {
  49. toggleTheme(selectedTheme)
  50. }
  51. </script>
  52. <meta name="robots" content="noindex, nofollow">
  53. <meta content="origin-when-cross-origin" name="referrer">
  54. <!-- Canonical URL for SEO purposes -->
  55. <link rel="canonical" href="https://berthub.eu/articles/posts/reverse-engineering-source-code-of-the-biontech-pfizer-vaccine/">
  56. <body class="remarkdown h1-underline h2-underline h3-underline em-underscore hr-center ul-star pre-tick" data-instant-intensity="viewport-all">
  57. <article>
  58. <header>
  59. <h1>Reverse Engineering the source code of the BioNTech/Pfizer SARS-CoV-2 Vaccine</h1>
  60. </header>
  61. <nav>
  62. <p class="center">
  63. <a href="/david/" title="Aller à l’accueil"><svg class="icon icon-home">
  64. <use xlink:href="/static/david/icons2/symbol-defs.svg#icon-home"></use>
  65. </svg> Accueil</a> •
  66. <a href="https://berthub.eu/articles/posts/reverse-engineering-source-code-of-the-biontech-pfizer-vaccine/" title="Lien vers le contenu original">Source originale</a>
  67. </p>
  68. </nav>
  69. <hr>
  70. <p>Welcome! In this post, we’ll be taking a character-by-character look at the
  71. source code of the BioNTech/Pfizer SARS-CoV-2 mRNA vaccine.</p>
  72. <blockquote>
  73. <p><em>I want to thank the large cast of people who spent time previewing this
  74. article for legibility and correctness. All mistakes remain mine though,
  75. but I would love to hear about them quickly at bert@hubertnet.nl or
  76. <a href="https://twitter.com/PowerDNS_Bert" target="_blank">@PowerDNS_Bert</a></em></p>
  77. </blockquote>
  78. <p>Now, these words may be somewhat jarring - the vaccine is a liquid that gets
  79. injected in your arm. How can we talk about source code?</p>
  80. <p>This is a good question, so let’s start off with a small part of the very
  81. source code of the BioNTech/Pfizer vaccine, also known as
  82. <a href="https://en.wikipedia.org/wiki/Tozinameran" target="_blank">BNT162b2</a>, also
  83. known as Tozinameran <a href="https://twitter.com/PowerDNS_Bert/status/1342109138965422083" target="_blank">also known as
  84. Comirnaty</a>.</p>
  85. <p></p>
  86. <p><center></p>
  87. <figure>
  88. <img src="/articles/bnt162b2.png" alt="First 500 characters of the BNT162b2 mRNA. Source: World Health Organization"> <figcaption>
  89. <p>First 500 characters of the BNT162b2 mRNA. Source: <a href="https://mednet-communities.net/inn/db/media/docs/11889.doc" target="_blank">World Health Organization</a></p>
  90. </figcaption>
  91. </figure>
  92. <p></center></p>
  93. <p>The BNT162b2 mRNA vaccine has this digital code at its heart. It is 4284
  94. characters long, so it would fit in a bunch of tweets. At the very
  95. beginning of the vaccine production process, someone uploaded this code to a
  96. DNA printer (yes), which then converted the bytes on disk to actual DNA
  97. molecules.</p>
  98. <p></p>
  99. <p><center></p>
  100. <figure>
  101. <img src="/articles/bioxp-3200.jpg" alt="A Codex DNA BioXp 3200 DNA printer"> <figcaption>
  102. <p>A <a href="https://codexdna.com/products/bioxp-system/" target="_blank">Codex DNA</a> BioXp 3200 DNA printer</p>
  103. </figcaption>
  104. </figure>
  105. <p></center></p>
  106. <p>Out of such a machine come tiny amounts of DNA, which after a lot of
  107. biological and chemical processing end up as RNA (more about which later) in
  108. the vaccine vial. A 30 microgram dose turns out to actually contain 30
  109. micrograms of RNA. In addition, there is a clever lipid (fatty) packaging
  110. system that gets the mRNA into our cells.</p>
  111. <p>RNA is the volatile ‘working memory’ version of DNA. DNA is like the flash
  112. drive storage of biology. DNA is very durable, internally redundant and
  113. very reliable. But much like computers do not execute code directly from a
  114. flash drive, before something happens, code gets copied to a faster,
  115. more versatile yet far more fragile system.</p>
  116. <p>For computers, this is RAM, for biology it is RNA. The resemblance is
  117. striking. Unlike flash memory, RAM degrades very quickly unless lovingly
  118. tended to. The reason the Pfizer/BioNTech mRNA vaccine must be stored in the
  119. deepest of deep freezers is the same: RNA is a fragile flower.</p>
  120. <p>Each RNA character weighs on the order of 0.53·10⁻²¹ grams, meaning
  121. there are around 6·10¹⁶ characters in a single 30 microgram vaccine dose.
  122. Expressed in bytes, this is around 14 petabytes, although it must be said
  123. this consists of around <a href="https://docs.google.com/spreadsheets/d/1vc6p9IXQVRpVQntcI1tCdSMLNDuT8fl8rags0gDxMZA/edit?usp=sharing" target="_blank">13,000 billion
  124. repetitions</a> of the same 4284
  125. characters. The actual informational content of the vaccine is just over a
  126. kilobyte. <a href="https://www.ncbi.nlm.nih.gov/projects/sviewer/?id=NC_045512&amp;tracks=%5Bkey:sequence_track,name:Sequence,display_name:Sequence,id:STD649220238,annots:Sequence,ShowLabel:false,ColorGaps:false,shown:true,order:1%5D%5Bkey:gene_model_track,name:Genes,display_name:Genes,id:STD3194982005,annots:Unnamed,Options:ShowAllButGenes,CDSProductFeats:true,NtRuler:true,AaRuler:true,HighlightMode:2,ShowLabel:true,shown:true,order:9%5D&amp;v=1:29903&amp;c=null&amp;select=null&amp;slim=0" target="_blank">SARS-CoV-2 itself</a> weighs in at around 7.5 kilobytes.</p>
  127. <blockquote>
  128. <p>Update: In the original post these numbers were off. <a href="https://docs.google.com/spreadsheets/d/1vc6p9IXQVRpVQntcI1tCdSMLNDuT8fl8rags0gDxMZA/edit?usp=sharing" target="_blank">Here is a
  129. spreadsheet</a>
  130. with the correct calculations.</p>
  131. </blockquote>
  132. <h2 id="the-briefest-bit-of-background">The briefest bit of background</h2>
  133. <p>DNA is a digital code. Unlike computers, which use 0 and 1, life uses A, C, G
  134. and U/T (the ‘nucleotides’, ‘nucleosides’ or ‘bases’).</p>
  135. <p>In computers we store the 0 and 1 as the presence or absence of a charge, or
  136. as a current, as a magnetic transition, or as a voltage, or as a modulation
  137. of a signal, or as a change in reflectivity. Or in short, the 0 and 1 are
  138. not some kind of abstract concept - they live as electrons and in many other
  139. physical embodiments.</p>
  140. <p>In nature, A, C, G and U/T are molecules, stored as chains in DNA (or RNA).</p>
  141. <p>In computers, we group 8 bits into a byte, and the byte is the typical unit
  142. of data being processed.</p>
  143. <p>Nature groups 3 nucleotides into a codon, and this codon is the typical unit
  144. of processing. A codon contains 6 bits of information (2 bits per DNA
  145. character, 3 characters = 6 bits. This means 2⁶ = 64 different codon values).</p>
  146. <p>Pretty digital so far. When in doubt, <a href="https://mednet-communities.net/inn/db/media/docs/11889.doc" target="_blank">head to the WHO
  147. document</a> with the
  148. digital code to see for yourself.</p>
  149. <blockquote>
  150. <p><em>Some further reading is <a href="https://berthub.eu/articles/posts/what-is-life/" target="_blank">available
  151. here</a> - this link (‘What
  152. is life’) might help make sense of the rest of this page. Or, if you like
  153. video, I have <a href="https://berthub.eu/dna" target="_blank">two hours for you</a>.</em></p>
  154. </blockquote>
  155. <h2 id="so-what-does-that-code-do">So what does that code DO?</h2>
  156. <p>The idea of a vaccine is to teach our immune system how to fight a pathogen,
  157. without us actually getting ill. Historically this has been done by
  158. injecting a weakened or incapacitated (attenuated) virus, plus an ‘adjuvant’
  159. to scare our immune system into action. This was a decidedly analogue
  160. technique involving billions of eggs (or insects). It also required a lot
  161. of luck and loads of time. Sometimes a different (unrelated) virus was also
  162. used.</p>
  163. <p>An mRNA vaccine achieves the same thing (‘educate our immune system’) but in
  164. a laser like way. And I mean this in both senses - very narrow but also
  165. very powerful.</p>
  166. <p>So here is how it works. The injection contains volatile genetic material
  167. that describes the famous SARS-CoV-2 ‘Spike’ protein. Through clever
  168. chemical means, the vaccine manages to get this genetic material into some of
  169. our cells.</p>
  170. <p>These then dutifully start producing SARS-CoV-2 Spike proteins in large
  171. enough quantities that our immune system springs into action. Confronted
  172. with Spike proteins, and (importantly) tell-tale signs that cells have been
  173. taken over, our immune system develops a powerful response against multiple
  174. aspects of the Spike protein AND the production process.</p>
  175. <p>And this is what gets us to the 95% efficient vaccine.</p>
  176. <h2 id="the-source-code">The source code!</h2>
  177. <p><a href="https://youtu.be/jp0opnxQ4rY?t=8" target="_blank">Let’s start at the very beginning, a very good place
  178. to start</a>. The WHO document has this
  179. helpful picture:</p>
  180. <p></p>
  181. <p><center></p>
  182. <figure>
  183. <img src="/articles/vaccine-toc.png">
  184. </figure>
  185. <p></center></p>
  186. <p>This is a sort of table of contents. We’ll start with the ‘cap’, actually
  187. depicted as a little hat.</p>
  188. <p>Much like you can’t just plonk opcodes in a file on a computer and run it,
  189. the biological operating system requires headers, has linkers and things
  190. like calling conventions.</p>
  191. <p>The code of the vaccine starts with the following two nucleotides:</p>
  192. <pre><code>GA
  193. </code></pre>
  194. <p>This can be compared very much to every <a href="https://en.wikipedia.org/wiki/DOS_MZ_executable" target="_blank">DOS and Windows executable starting
  195. with MZ</a>, or UNIX scripts starting with
  196. <a href="https://en.wikipedia.org/wiki/Shebang_(Unix)" target="_blank"><code>#!</code></a>. In both life and
  197. operating systems, these two characters are not executed in any way. But
  198. they have to be there because otherwise nothing happens.</p>
  199. <p>The mRNA ‘cap’ <a href="https://en.wikipedia.org/wiki/Five-prime_cap#Function" target="_blank">has a number of
  200. functions</a>. For one, it marks code as coming
  201. from the nucleus. In our case of course it doesn’t, our code comes from a
  202. vaccination. But we don’t need to tell the cell that. The cap makes our code
  203. look legit, which protects it from destruction.</p>
  204. <p>The initial two <code>GA</code> nucleotides are also chemically slightly different from
  205. the rest of the RNA. In this sense, the <code>GA</code> has some out-of-band
  206. signaling on it.</p>
  207. <h2 id="the-five-prime-untranslated-region">The “five-prime untranslated region”</h2>
  208. <p>Some lingo here. RNA molecules can only be read in one direction.
  209. Confusingly, the part where the reading begins is called the 5’ or
  210. ‘five-prime’. The reading stops at the 3’ or three-prime end.</p>
  211. <p>Life consists of proteins (or things made by proteins). And these proteins
  212. are described in RNA. When RNA gets converted into proteins, this is called
  213. translation.</p>
  214. <p>Here we have the 5’ untranslated region (‘UTR’), so this bit does not end up
  215. in the protein:</p>
  216. <pre><code>GAAΨAAACΨAGΨAΨΨCΨΨCΨGGΨCCCCACAGACΨCAGAGAGAACCCGCCACC
  217. </code></pre>
  218. <p>Here we encounter our first surprise. The normal RNA characters are A, C, G
  219. and U. U is also known as ’T’ in DNA. But here we find a Ψ, what is going
  220. on?</p>
  221. <p>This is one of the exceptionally clever bits about the vaccine. Our body
  222. runs a powerful antivirus system (“the original one”). For this reason,
  223. cells are extremely unenthusiastic about foreign RNA and try very hard to
  224. destroy it before it does anything.</p>
  225. <p>This is somewhat of a problem for our vaccine - it needs to sneak past our
  226. immune system. Over many years of experimentation, it was found that if the
  227. U in RNA is replaced by a slightly modified molecule, our immune system
  228. loses interest. For real.</p>
  229. <p>So in the BioNTech/Pfizer vaccine, every U has been replaced by
  230. 1-methyl-3’-pseudouridylyl, denoted by Ψ. The really clever bit is that
  231. although this replacement Ψ placates (calms) our immune system, it is
  232. accepted as a normal U by relevant parts of the cell.</p>
  233. <p>In computer security we also know this trick - it sometimes is possible to
  234. transmit a slightly corrupted version of a message that confuses firewalls and
  235. security solutions, but that is still accepted by the backend servers -
  236. which can then get hacked.</p>
  237. <p>We are now reaping the benefits of fundamental scientific research performed
  238. in the past. The
  239. <a href="https://twitter.com/PennMedicine/status/1341766354232365059" target="_blank">discoverers</a>
  240. of this Ψ technique had to fight to get
  241. <a href="https://www.statnews.com/2020/11/10/the-story-of-mrna-how-a-once-dismissed-idea-became-a-leading-technology-in-the-covid-vaccine-race/" target="_blank">their</a>
  242. work funded and then accepted. We should all be very grateful, and I am sure
  243. the <a href="https://twitter.com/PowerDNS_Bert/status/1329861047168225281" target="_blank">Nobel prizes will arrive in due
  244. course</a>.</p>
  245. <blockquote>
  246. <p>Many people have asked, could viruses also use the Ψ technique to beat our
  247. immune systems? In short, this is extremely unlikely. Life simply does
  248. not have the machinery to build 1-methyl-3’-pseudouridylyl nucleotides.
  249. Viruses rely on the machinery of life to reproduce themselves, and this
  250. facility is simply not there. The mRNA vaccines quickly degrade in the
  251. human body, and there is no possibility of the Ψ-modified RNA
  252. replicating with the Ψ still in there. “<a href="https://www.deplatformdisease.com/blog/no-really-mrna-vaccines-are-not-going-to-affect-your-dna" target="_blank">No, Really, mRNA Vaccines Are Not Going To Affect Your
  253. DNA</a>“
  254. is also a good read.</p>
  255. </blockquote>
  256. <p>Ok, back to the 5’ UTR. What do these 51 characters do? As everything in
  257. nature, almost nothing has one clear function.</p>
  258. <p>When our cells need to <em>translate</em> RNA into proteins, this is done using a
  259. machine called the ribosome. The ribosome is like a 3D printer for
  260. proteins. It ingests a strand of RNA and based on that it emits a string of
  261. amino acids, which then fold into a protein.</p>
  262. <p></p>
  263. <p><center></p>
  264. <video controls loop>
  265. <source src="/articles/protein-short.mp4" type="video/mp4">
  266. </source></video>
  267. <p><br>
  268. Source: <a href="https://commons.wikimedia.org/wiki/File:Protein_translation.gif" target="_blank">Wikipedia user Bensaccount</a>
  269. </center></p>
  270. <p>This is what we see happening above. The black ribbon at the bottom is RNA.
  271. The ribbon appearing in the green bit is the protein being formed. The
  272. things flying in and out are amino acids plus adaptors to make them fit on
  273. RNA.</p>
  274. <p>This ribosome needs to physically sit on the RNA strand for it to get to
  275. work. Once seated, it can start forming proteins based on further RNA it
  276. ingests. From this, you can imagine that it can’t yet read the parts where
  277. it lands on first. This is just one of the functions of the UTR: the
  278. ribosome landing zone. The UTR provides ‘lead-in’.</p>
  279. <p>In addition to this, the UTR also contains metadata: when should translation
  280. happen? And how much? For the vaccine, they took the most ‘right now’ UTR
  281. they could find, taken from the <a href="https://www.tandfonline.com/doi/full/10.1080/15476286.2018.1450054" target="_blank">alpha globin
  282. gene</a>.
  283. This gene is known to robustly produce a lot of proteins. In previous
  284. years, scientists had already found ways to optimize this UTR even further
  285. (according to the WHO document), so this is not quite the alpha globin UTR.
  286. It is better.</p>
  287. <h2 id="the-s-glycoprotein-signal-peptide">The S glycoprotein signal peptide</h2>
  288. <p>As noted, the goal of the vaccine is to get the cell to produce copious
  289. amounts of the Spike protein of SARS-CoV-2. Up to this point, we have mostly
  290. encountered metadata and “calling convention” stuff in the vaccine source
  291. code. But now we enter the actual viral protein territory.</p>
  292. <p>We still have one layer of metadata to go however. Once the ribosome (from the
  293. splendid animation above) has made a protein, that protein still needs to go
  294. somewhere. This is encoded in the “S glycoprotein signal peptide (extended leader
  295. sequence)“.</p>
  296. <p>The way to see this is that at the beginning of the protein there is a sort
  297. of address label - encoded as part of the protein itself. In this specific
  298. case, the signal peptide says that this protein should exit the cell via the
  299. “endoplasmic reticulum”. Even Star Trek lingo is not as fancy as this!</p>
  300. <p>The “signal peptide” is not very long, but when we look at the code, there
  301. are differences between the viral and vaccine RNA:</p>
  302. <p>(Note that for comparison purposes, I have replaced the fancy modified Ψ by a
  303. regular RNA U)</p>
  304. <pre><code> 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
  305. Virus: AUG UUU GUU UUU CUU GUU UUA UUG CCA CUA GUC UCU AGU CAG UGU GUU
  306. Vaccine: AUG UUC GUG UUC CUG GUG CUG CUG CCU CUG GUG UCC AGC CAG UGU GUG
  307. ! ! ! ! ! ! ! ! ! ! ! ! ! !
  308. </code></pre>
  309. <p>So what is going on? I have not accidentally listed the RNA in groups of 3
  310. letters. Three RNA characters make up a codon. And every codon encodes for a
  311. specific amino acid. The signal peptide in the vaccine consists of <em>exactly</em>
  312. the same amino acids as in the virus itself.</p>
  313. <p>So how come the RNA is different?</p>
  314. <p>There are 4³=64 different codons, since there are 4 RNA characters, and
  315. there are three of them in a codon. Yet there are only 20 different
  316. amino acids. This means that multiple codons encode for the same amino acid.</p>
  317. <p>Life uses the following nearly universal table for mapping RNA codons to
  318. amino acids:</p>
  319. <p></p>
  320. <p><center></p>
  321. <figure>
  322. <img src="/articles/rna-codon-table.png" alt="The RNA codon table (Wikipedia)"> <figcaption>
  323. <p><a href="https://en.wikipedia.org/wiki/DNA_and_RNA_codon_tables" target="_blank">The RNA codon table</a> (Wikipedia)</p>
  324. </figcaption>
  325. </figure>
  326. <p></center></p>
  327. <p>In this table, we can see that the modifications in the vaccine (UUU -&gt;
  328. UUC) are all <em>synonymous</em>. The vaccine RNA code is different, but the same
  329. amino acids and the same protein come out.</p>
  330. <p>If we look closely, we see that the majority of the changes happen in the
  331. third codon position, noted with a ‘3’ above. And if we check the universal
  332. codon table, we see that this third position indeed often does not matter
  333. for which amino acid is produced.</p>
  334. <p>So, the changes are synonymous, but then why are they there? Looking
  335. closely, we see that all changes <em>except one</em> lead to more C and Gs.</p>
  336. <p>So why would you do that? As noted above, our immune system takes a very dim
  337. view of ‘exogenous’ RNA, RNA code coming from outside the cell. To evade
  338. detection, the ‘U’ in the RNA was already replaced by a Ψ.</p>
  339. <p>However, it turns out that RNA with <a href="https://www.nature.com/articles/nrd.2017.243" target="_blank">a higher
  340. amount</a> of Gs and Cs is
  341. also <a href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1463026/" target="_blank">converted more efficiently into
  342. proteins</a>,</p>
  343. <p>And this has been achieved in the vaccine RNA by replacing many characters
  344. with Gs and Cs wherever this was possible.</p>
  345. <blockquote>
  346. <p>I’m slightly fascinated by the <em>one</em> change that did not lead to an
  347. additional C or G, the CCA -&gt; CCU modification. If anyone knows the reason,
  348. please let me know! Note that I’m aware that some codons are more common
  349. than others in the human genome, but <a href="https://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1006024" target="_blank">I also read that this does not
  350. influence translation speed a
  351. lot</a>.</p>
  352. </blockquote>
  353. <h2 id="the-actual-spike-protein">The actual Spike protein</h2>
  354. <p>The next 3777 characters of the vaccine RNA are similarly ‘codon optimized’
  355. to add a lot of C’s and G’s. In the interest of space I won’t list all
  356. the code here, but we are going to zoom in on one exceptionally special
  357. bit. This is the bit that makes it work, the part that will actually help us
  358. return to life as normal:</p>
  359. <pre><code> * *
  360. L D K V E A E V Q I D R L I T G
  361. Virus: CUU GAC AAA GUU GAG GCU GAA GUG CAA AUU GAU AGG UUG AUC ACA GGC
  362. Vaccine: CUG GAC CCU CCU GAG GCC GAG GUG CAG AUC GAC AGA CUG AUC ACA GGC
  363. L D P P E A E V Q I D R L I T G
  364. ! !!! !! ! ! ! ! ! ! !
  365. </code></pre>
  366. <p>Here we see the usual synonymous RNA changes. For example, in the first
  367. codon we see that CUU is changed into CUG. This adds another ‘G’ to the
  368. vaccine, which we know helps enhance protein production. Both CUU
  369. and CUG encode for the amino acid ‘L’ or Leucine, so nothing changed in the
  370. protein.</p>
  371. <p>When we compare the entire Spike protein in the vaccine, all changes are
  372. synonymous like this.. except for two, and this is what we see here.</p>
  373. <p>The third and fourth codons above represent actual changes. The K and V
  374. amino acids there are both replaced by ‘P’ or Proline. For ‘K’ this required
  375. three changes (‘!!!’) and for ‘V’ it required only two (‘!!’).</p>
  376. <p><strong>It turns out that these two changes enhance the vaccine efficiency
  377. enormously</strong>.</p>
  378. <p>So what is happening here? If you look at a real SARS-CoV-2 particle, you
  379. can see the Spike protein as, well, a bunch of spikes:</p>
  380. <p></p>
  381. <p><center></p>
  382. <figure>
  383. <img src="/articles/sars-em.jpg" alt="SARS virus particles (Wikipedia)"> <figcaption>
  384. <p><a href="https://en.wikipedia.org/wiki/Severe_acute_respiratory_syndrome_coronavirus" target="_blank">SARS virus particles</a> (Wikipedia)</p>
  385. </figcaption>
  386. </figure>
  387. <p></center></p>
  388. <p>The spikes are mounted on the virus body (‘the nucleocapsid protein’). But
  389. the thing is, our vaccine is only generating the spikes itself, and we’re
  390. not mounting them on any kind of virus body.</p>
  391. <p>It turns out that, unmodified, freestanding Spike proteins collapse into a
  392. different structure. If injected as a vaccine, this would indeed cause our
  393. bodies to develop immunity.. but only against the collapsed spike protein.</p>
  394. <p>And the real SARS-CoV-2 shows up with the spiky Spike. The vaccine would not
  395. work very well in that case.</p>
  396. <p>So what to do? In <a href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5584442/" target="_blank">2017 it was described how putting a double Proline
  397. substitution in just the right
  398. place</a> would make the
  399. SARS-CoV-1 and MERS
  400. S proteins take up their ‘pre-fusion’ configuration, even without being part of
  401. the whole virus. This works <a href="https://cen.acs.org/pharmaceuticals/vaccines/tiny-tweak-behind-COVID-19/98/i38" target="_blank">because Proline is a very rigid amino
  402. acid</a>. It
  403. acts as a kind of splint, stabilising the protein in the state we need to
  404. show to the immune system.</p>
  405. <p>The <a href="https://twitter.com/goodwish916" target="_blank">people</a> that
  406. <a href="https://twitter.com/KizzyPhD" target="_blank">discovered</a> this should be walking
  407. around high-fiving themselves incessantly. Unbearable amounts of smugness
  408. should be emanating from them. <a href="https://twitter.com/McLellan_Lab/status/1291077489566142464" target="_blank">And it would all be well
  409. deserved</a>.</p>
  410. <blockquote>
  411. <p>Update! I have been contacted by the <a href="https://twitter.com/McLellan_Lab/status/1291077489566142464" target="_blank">McLellan
  412. lab</a>, one of the
  413. groups behind the Proline discovery. They tell me the high-fiving is
  414. subdued because of the ongoing pandemic, but they are pleased to have
  415. contributed to the vaccines. They also stress the importance of many other
  416. groups, workers and volunteers.</p>
  417. </blockquote>
  418. <h2 id="the-end-of-the-protein-next-steps">The end of the protein, next steps</h2>
  419. <p>If we scroll through the rest of the source code, we encounter some small
  420. modifications at the end of the Spike protein:</p>
  421. <pre><code> V L K G V K L H Y T s
  422. Virus: GUG CUC AAA GGA GUC AAA UUA CAU UAC ACA UAA
  423. Vaccine: GUG CUG AAG GGC GUG AAA CUG CAC UAC ACA UGA UGA
  424. V L K G V K L H Y T s s
  425. ! ! ! ! ! ! ! !
  426. </code></pre>
  427. <p>At the end of a protein we find a ‘stop’ codon, denoted here by a lowercase
  428. ’s’. This is a polite way of saying that the protein should end here. The
  429. original virus uses the UAA stop codon, the vaccine uses two UGA stop
  430. codons, perhaps just for good measure.</p>
  431. <h2 id="the-3-untranslated-region">The 3’ Untranslated Region</h2>
  432. <p>Much like the ribosome needed some lead-in at the 5’ end, where we found the
  433. ‘five prime untranslated region’, at the end of a protein coding region we find a similar
  434. construct called the 3’ UTR.</p>
  435. <p>Many words could be written about the 3’ UTR, but here I quote <a href="https://en.wikipedia.org/wiki/Three_prime_untranslated_region" target="_blank">what the
  436. Wikipedia
  437. says</a>: “The 3’-untranslated region plays a crucial role in gene
  438. expression by influencing the localization, stability, export, and
  439. translation efficiency of an mRNA .. <strong>despite our current understanding of
  440. 3’-UTRs, they are still relative mysteries</strong>”.</p>
  441. <p>What we do know is that certain 3’-UTRs are very successful at promoting
  442. protein expression. According to the WHO document, the BioNTech/Pfizer
  443. vaccine 3’-UTR was picked from “the amino-terminal enhancer of split (AES)
  444. mRNA and the mitochondrial encoded 12S ribosomal RNA to confer RNA stability
  445. and high total protein expression”. To which I say, well done.</p>
  446. <p></p>
  447. <p><center></p>
  448. <figure>
  449. <img src="/articles/vaccine.jpg">
  450. </figure>
  451. <p></center></p>
  452. <h2 id="the-aaaaaaaaaaaaaaaaaaaaaa-end-of-it-all">The AAAAAAAAAAAAAAAAAAAAAA end of it all</h2>
  453. <p>The very end of mRNA is polyadenylated. This is a fancy way of saying it
  454. ends on a lot of AAAAAAAAAAAAAAAAAAA. Even mRNA has had enough of 2020 it
  455. appears.</p>
  456. <p>mRNA can be reused many times, but as this happens, it also loses some of
  457. the A’s at the end. Once the A’s run out, the mRNA is no longer functional
  458. and gets discarded. In this way, the ‘poly-A’ tail is protection from
  459. degradation.</p>
  460. <p>Studies have been done to find out what the optimal number of A’s at the end
  461. is for mRNA vaccines. I read in the open literature that this peaked at 120
  462. or so.</p>
  463. <p>The BNT162b2 vaccine ends with:</p>
  464. <pre><code> ****** ****
  465. UAGCAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAGCAUAU GACUAAAAAA AAAAAAAAAA
  466. AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAA
  467. </code></pre>
  468. <p>This is 30 A’s, then a “10 nucleotide linker” (GCAUAUGACU), followed by another 70
  469. A’s.</p>
  470. <p>There are various theories why this linker is there. Some people tell me it
  471. has to do with DNA plasmid stability, I have also received this from an
  472. actual expert:</p>
  473. <p>“The 10-nucleotide linker within the poly(A) tail makes it easier to stitch
  474. together the synthetic DNA fragments that become the template for transcribing
  475. the mRNA. It also reduces slipping by T7 RNA polymerase so that the
  476. transcribed mRNA is more uniform in length”.</p>
  477. <h2 id="summarising">Summarising</h2>
  478. <p>With this, we now know the exact mRNA contents of the BNT162b2 vaccine, and
  479. for most parts we understand why they are there:</p>
  480. <ul>
  481. <li>The CAP to make sure the RNA looks like regular mRNA</li>
  482. <li>A known successful and optimized 5’ untranslated region (UTR)</li>
  483. <li>A codon optimized signal peptide to send the Spike protein to the right
  484. place (copied 100% from the original virus)</li>
  485. <li>A codon optimized version of the original spike, with two ‘Proline’
  486. substitutions to make sure the protein appears in the right form</li>
  487. <li>A known successful and optimized 3’ untranslated region</li>
  488. <li>A slightly mysterious poly-A tail with an unexplained ‘linker’ in there</li>
  489. </ul>
  490. <p>The codon optimization adds a lot of G and C to the mRNA. Meanwhile, using Ψ
  491. (1-methyl-3’-pseudouridylyl) instead of U helps evade our immune system, so
  492. the mRNA stays around long enough so we can actually help train the immune
  493. system.</p>
  494. <h2 id="further-reading-viewing">Further reading/viewing</h2>
  495. <p>In 2017 I held a two hour presentation on DNA, which you can <a href="https://berthub.eu/dna" target="_blank">view
  496. here</a>. Like this page it is aimed at computer
  497. people.</p>
  498. <p>In addition, I’ve been maintaining a page on ‘<a href="https://berthub.eu/amazing-dna" target="_blank">DNA for
  499. programmers</a>’ since 2001.</p>
  500. <p>You might also enjoy <a href="https://berthub.eu/articles/posts/immune-system/" target="_blank">this introduction to our amazing immune
  501. system</a>.</p>
  502. <p>Finally, <a href="https://berthub.eu/articles" target="_blank">this listing of my blog posts</a> has quite some
  503. DNA, SARS-CoV-2 and COVID related material.</p>
  504. </article>
  505. <hr>
  506. <footer>
  507. <p>
  508. <a href="/david/" title="Aller à l’accueil"><svg class="icon icon-home">
  509. <use xlink:href="/static/david/icons2/symbol-defs.svg#icon-home"></use>
  510. </svg> Accueil</a> •
  511. <a href="/david/log/" title="Accès au flux RSS"><svg class="icon icon-rss2">
  512. <use xlink:href="/static/david/icons2/symbol-defs.svg#icon-rss2"></use>
  513. </svg> Suivre</a> •
  514. <a href="http://larlet.com" title="Go to my English profile" data-instant><svg class="icon icon-user-tie">
  515. <use xlink:href="/static/david/icons2/symbol-defs.svg#icon-user-tie"></use>
  516. </svg> Pro</a> •
  517. <a href="mailto:david%40larlet.fr" title="Envoyer un courriel"><svg class="icon icon-mail">
  518. <use xlink:href="/static/david/icons2/symbol-defs.svg#icon-mail"></use>
  519. </svg> Email</a> •
  520. <abbr class="nowrap" title="Hébergeur : Alwaysdata, 62 rue Tiquetonne 75002 Paris, +33184162340"><svg class="icon icon-hammer2">
  521. <use xlink:href="/static/david/icons2/symbol-defs.svg#icon-hammer2"></use>
  522. </svg> Légal</abbr>
  523. </p>
  524. <template id="theme-selector">
  525. <form>
  526. <fieldset>
  527. <legend><svg class="icon icon-brightness-contrast">
  528. <use xlink:href="/static/david/icons2/symbol-defs.svg#icon-brightness-contrast"></use>
  529. </svg> Thème</legend>
  530. <label>
  531. <input type="radio" value="auto" name="chosen-color-scheme" checked> Auto
  532. </label>
  533. <label>
  534. <input type="radio" value="dark" name="chosen-color-scheme"> Foncé
  535. </label>
  536. <label>
  537. <input type="radio" value="light" name="chosen-color-scheme"> Clair
  538. </label>
  539. </fieldset>
  540. </form>
  541. </template>
  542. </footer>
  543. <script src="/static/david/js/instantpage-5.1.0.min.js" type="module"></script>
  544. <script>
  545. function loadThemeForm(templateName) {
  546. const themeSelectorTemplate = document.querySelector(templateName)
  547. const form = themeSelectorTemplate.content.firstElementChild
  548. themeSelectorTemplate.replaceWith(form)
  549. form.addEventListener('change', (e) => {
  550. const chosenColorScheme = e.target.value
  551. localStorage.setItem('theme', chosenColorScheme)
  552. toggleTheme(chosenColorScheme)
  553. })
  554. const selectedTheme = localStorage.getItem('theme')
  555. if (selectedTheme && selectedTheme !== 'undefined') {
  556. form.querySelector(`[value="${selectedTheme}"]`).checked = true
  557. }
  558. }
  559. const prefersColorSchemeDark = '(prefers-color-scheme: dark)'
  560. window.addEventListener('load', () => {
  561. let hasDarkRules = false
  562. for (const styleSheet of Array.from(document.styleSheets)) {
  563. let mediaRules = []
  564. for (const cssRule of styleSheet.cssRules) {
  565. if (cssRule.type !== CSSRule.MEDIA_RULE) {
  566. continue
  567. }
  568. // WARNING: Safari does not have/supports `conditionText`.
  569. if (cssRule.conditionText) {
  570. if (cssRule.conditionText !== prefersColorSchemeDark) {
  571. continue
  572. }
  573. } else {
  574. if (cssRule.cssText.startsWith(prefersColorSchemeDark)) {
  575. continue
  576. }
  577. }
  578. mediaRules = mediaRules.concat(Array.from(cssRule.cssRules))
  579. }
  580. // WARNING: do not try to insert a Rule to a styleSheet you are
  581. // currently iterating on, otherwise the browser will be stuck
  582. // in a infinite loop…
  583. for (const mediaRule of mediaRules) {
  584. styleSheet.insertRule(mediaRule.cssText)
  585. hasDarkRules = true
  586. }
  587. }
  588. if (hasDarkRules) {
  589. loadThemeForm('#theme-selector')
  590. }
  591. })
  592. </script>
  593. </body>
  594. </html>