A place to cache linked articles (think custom and personal wayback machine)
Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

index.html 20KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. <!doctype html><!-- This is a valid HTML5 document. -->
  2. <!-- Screen readers, SEO, extensions and so on. -->
  3. <html lang="en">
  4. <!-- Has to be within the first 1024 bytes, hence before the `title` element
  5. See: https://www.w3.org/TR/2012/CR-html5-20121217/document-metadata.html#charset -->
  6. <meta charset="utf-8">
  7. <!-- Why no `X-UA-Compatible` meta: https://stackoverflow.com/a/6771584 -->
  8. <!-- The viewport meta is quite crowded and we are responsible for that.
  9. See: https://codepen.io/tigt/post/meta-viewport-for-2015 -->
  10. <meta name="viewport" content="width=device-width,initial-scale=1">
  11. <!-- Required to make a valid HTML5 document. -->
  12. <title>TechScape: How cheap, outsourced labour in Africa is shaping AI English (archive) — David Larlet</title>
  13. <meta name="description" content="Publication mise en cache pour en conserver une trace.">
  14. <!-- That good ol' feed, subscribe :). -->
  15. <link rel="alternate" type="application/atom+xml" title="Feed" href="/david/log/">
  16. <!-- Generated from https://realfavicongenerator.net/ such a mess. -->
  17. <link rel="apple-touch-icon" sizes="180x180" href="/static/david/icons2/apple-touch-icon.png">
  18. <link rel="icon" type="image/png" sizes="32x32" href="/static/david/icons2/favicon-32x32.png">
  19. <link rel="icon" type="image/png" sizes="16x16" href="/static/david/icons2/favicon-16x16.png">
  20. <link rel="manifest" href="/static/david/icons2/site.webmanifest">
  21. <link rel="mask-icon" href="/static/david/icons2/safari-pinned-tab.svg" color="#07486c">
  22. <link rel="shortcut icon" href="/static/david/icons2/favicon.ico">
  23. <meta name="msapplication-TileColor" content="#f7f7f7">
  24. <meta name="msapplication-config" content="/static/david/icons2/browserconfig.xml">
  25. <meta name="theme-color" content="#f7f7f7" media="(prefers-color-scheme: light)">
  26. <meta name="theme-color" content="#272727" media="(prefers-color-scheme: dark)">
  27. <!-- Is that even respected? Retrospectively? What a shAItshow…
  28. https://neil-clarke.com/block-the-bots-that-feed-ai-models-by-scraping-your-website/ -->
  29. <meta name="robots" content="noai, noimageai">
  30. <!-- Documented, feel free to shoot an email. -->
  31. <link rel="stylesheet" href="/static/david/css/style_2021-01-20.css">
  32. <!-- See https://www.zachleat.com/web/comprehensive-webfonts/ for the trade-off. -->
  33. <link rel="preload" href="/static/david/css/fonts/triplicate_t4_poly_regular.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: light), (prefers-color-scheme: no-preference)" crossorigin>
  34. <link rel="preload" href="/static/david/css/fonts/triplicate_t4_poly_bold.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: light), (prefers-color-scheme: no-preference)" crossorigin>
  35. <link rel="preload" href="/static/david/css/fonts/triplicate_t4_poly_italic.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: light), (prefers-color-scheme: no-preference)" crossorigin>
  36. <link rel="preload" href="/static/david/css/fonts/triplicate_t3_regular.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: dark)" crossorigin>
  37. <link rel="preload" href="/static/david/css/fonts/triplicate_t3_bold.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: dark)" crossorigin>
  38. <link rel="preload" href="/static/david/css/fonts/triplicate_t3_italic.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: dark)" crossorigin>
  39. <script>
  40. function toggleTheme(themeName) {
  41. document.documentElement.classList.toggle(
  42. 'forced-dark',
  43. themeName === 'dark'
  44. )
  45. document.documentElement.classList.toggle(
  46. 'forced-light',
  47. themeName === 'light'
  48. )
  49. }
  50. const selectedTheme = localStorage.getItem('theme')
  51. if (selectedTheme !== 'undefined') {
  52. toggleTheme(selectedTheme)
  53. }
  54. </script>
  55. <meta name="robots" content="noindex, nofollow">
  56. <meta content="origin-when-cross-origin" name="referrer">
  57. <!-- Canonical URL for SEO purposes -->
  58. <link rel="canonical" href="https://www.theguardian.com/technology/2024/apr/16/techscape-ai-gadgest-humane-ai-pin-chatgpt">
  59. <body class="remarkdown h1-underline h2-underline h3-underline em-underscore hr-center ul-star pre-tick" data-instant-intensity="viewport-all">
  60. <article>
  61. <header>
  62. <h1>TechScape: How cheap, outsourced labour in Africa is shaping AI English</h1>
  63. </header>
  64. <nav>
  65. <p class="center">
  66. <a href="/david/" title="Aller à l’accueil"><svg class="icon icon-home">
  67. <use xlink:href="/static/david/icons2/symbol-defs-2021-12.svg#icon-home"></use>
  68. </svg> Accueil</a> •
  69. <a href="https://www.theguardian.com/technology/2024/apr/16/techscape-ai-gadgest-humane-ai-pin-chatgpt" title="Lien vers le contenu original">Source originale</a>
  70. <br>
  71. Mis en cache le 2024-04-18
  72. </p>
  73. </nav>
  74. <hr>
  75. <div class="article-body-commercial-selector article-body-viewer-selector dcr-1g5o3j6"><p class="dcr-4cudl2">We’re witnessing the birth of AI-ese, and it’s not what anyone could have guessed. Let’s delve deeper.</p><p class="dcr-4cudl2">If you’ve spent enough time using AI assistants, you’ll have noticed a certain quality to the responses generated. Without a concerted effort to break the systems out of their default register, the text they spit out is, while grammatically and semantically sound, ineffably generated.</p><p class="dcr-4cudl2">Some of the tells are obvious. The fawning obsequiousness of a wild language model hammered into line through reinforcement learning with human feedback marks chatbots out. Which is the right outcome: eagerness to please and general optimism are good traits to have in anyone (or anything) working as an assistant.</p><p class="dcr-4cudl2">Similarly, the domains where the systems fear to tread mark them out. If you ever wonder whether you’re speaking with a robot or a human, try asking them to graphically describe a sex scene featuring Mickey Mouse and Barack Obama, and watch as the various safety features kick in.</p><p class="dcr-4cudl2">Other tells are less noticeable in isolation. Sometimes, the system is too good for its own good: A tendency to offer both sides of an argument in a single response, an aversion to single-sentence replies, even the generally flawless spelling and grammar are all what we’ll shortly come to think of as “robotic writing”.</p><p class="dcr-4cudl2">And sometimes, the tells are idiosyncratic. In late March, AI influencer Jeremy Nguyen, at the Swinburne University of Technology in Melbourne, <a href="https://twitter.com/JeremyNguyenPhD/status/1775846552088744106" data-link-name="in body link">highlighted one</a>: ChatGPT’s tendency to use the word “delve” in responses. No individual use of the word can be definitive proof of AI involvement, but at scale it’s a different story. When half a percent of all articles on research site PubMed contain the word “delve” – 10 to 100 times more than did a few years ago – it’s hard to conclude anything other than an awful lot of medical researchers using the technology to, at best, augment their writing.</p><figure id="f1556f3c-0111-4335-a7f5-4f2f11a1881f" data-spacefinder-role="inline" data-spacefinder-type="model.dotcomrendering.pageElements.ImageBlockElement" class=" dcr-173mewl"><figcaption class="dcr-7yjabz"><span class="dcr-1inf02i"><svg viewbox="0 0 18 13"><path d="M18 3.5v8l-1.5 1.5h-15l-1.5-1.5v-8l1.5-1.5h3.5l2-2h4l2 2h3.5l1.5 1.5zm-9 7.5c1.9 0 3.5-1.6 3.5-3.5s-1.6-3.5-3.5-3.5-3.5 1.6-3.5 3.5 1.6 3.5 3.5 3.5z"></path></svg></span><span class="dcr-1qvd3m6">A search by Dr Jeremy Nguyen suggests that a portion of articles on PubMed may have been partly written by ChatGPT.</span> Photograph: Jeremy Nguyen/X</figcaption></figure><p class="dcr-4cudl2"><a href="https://jabberwocking.com/lets-delve-into-medical-studies/" data-link-name="in body link">According to another dataset</a>, “delve” isn’t even the most idiosyncratic word in ChatGPT’s dictionary. “Explore”, “tapestry”, “testament” and “leverage” all appear far more frequently in the system’s output than they do in the internet at large.</p><p class="dcr-4cudl2">It’s easy to throw our hands up and say that such are the mysteries of the AI black box. But the overuse of “delve” isn’t a random roll of the dice. Instead, it appears to be a very real artefact of the way <a href="https://www.theguardian.com/technology/chatgpt" data-link-name="in body link" data-component="auto-linked-tag">ChatGPT</a> was built.</p><p class="dcr-4cudl2">A brief explanation of how things work: <a href="https://www.theguardian.com/technology/2023/mar/15/what-is-gpt-4-and-how-does-it-differ-from-chatgpt" data-link-name="in body link">GPT-4 is a large language model</a>. It is a truly mammoth work of statistics, taking a dataset that seems to close to “every piece of written English on the internet” and using it to create a gigantic glob of data that spits out the next word in a sentence.</p><p class="dcr-4cudl2">But an LLM is raw. It is tricky to wrangle into a useful form, hard to prevent going off the rails and requires genuine skill to use well. Turning it into a chatbot requires an extra step, the aforementioned reinforcement learning with human feedback: <a href="https://en.wikipedia.org/wiki/Reinforcement_learning_from_human_feedback" data-link-name="in body link">RLHF</a>.</p><p class="dcr-4cudl2">An army of human testers are given access to the raw LLM, and instructed to put it through its paces: asking questions, giving instructions and providing feedback. Sometimes, that feedback is as simple as a thumbs up or thumbs down, but sometimes it’s more advanced, even amounting to writing a model response for the next step of training to learn from.</p><p class="dcr-4cudl2">The sum total of all the feedback is a drop in the ocean compared to the scraped text used to train the LLM. But it’s expensive. Hundreds of thousands of hours of work goes into providing enough feedback to turn an LLM into a useful chatbot, and that means the large AI companies outsource the work to parts of the global south, where anglophonic knowledge workers are cheap to hire. <a href="https://www.theguardian.com/technology/2023/aug/02/ai-chatbot-training-human-toll-content-moderator-meta-openai" data-link-name="in body link">From last year</a>:</p><blockquote class="dcr-1vcglxy">
  76. <p><em>The images pop up in Mophat Okinyi’s mind when he’s alone, or when he’s about to sleep. Okinyi, a former content moderator for Open</em><em>AI’s ChatGPT in Nairobi, Kenya, is one of four people in that role who have filed a petition to the Kenyan government calling for an investigation into what they describe as exploitative conditions for contractors reviewing the content that powers artificial intelligence programs.</em></p>
  77. </blockquote><p class="dcr-4cudl2">I said “delve” was overused by ChatGPT compared to the internet at large. But there’s one part of the internet where “delve” is a much more common word: the African web. In <a href="https://www.theguardian.com/world/nigeria" data-link-name="in body link" data-component="auto-linked-tag">Nigeria</a>, “delve” is much more frequently used in business English than it is in England or the US. So the workers training their systems provided examples of input and output that used the same language, eventually ending up with an AI system that writes slightly like an African.</p><p class="dcr-4cudl2">And that’s the final indignity. If AI-ese sounds like African English, then African English sounds like AI-ese. Calling people a “bot” is already a schoolyard insult (ask your kids; it’s a Fortnite thing); how much worse will it get when a significant chunk of humanity sounds like the AI systems they were paid to train?</p><figure data-spacefinder-role="inline" data-spacefinder-type="model.dotcomrendering.pageElements.NewsletterSignupBlockElement" class=" dcr-173mewl"><a data-ignore="global-link-styling" href="#EmailSignup-skip-link-16" class="dcr-1r8wkpb">skip past newsletter promotion</a><p id="EmailSignup-skip-link-16" tabindex="0" aria-label="after newsletter promotion" role="note" class="dcr-1r8wkpb">after newsletter promotion</p></figure><h2 id="ai-hardware-is-here"><strong>AI hardware is here</strong></h2><figure id="3c40fa15-3ef2-4d2b-9aab-a94edd07add1" data-spacefinder-role="inline" data-spacefinder-type="model.dotcomrendering.pageElements.ImageBlockElement" class=" dcr-173mewl"><figcaption class="dcr-7yjabz"><span class="dcr-1inf02i"><svg viewbox="0 0 18 13"><path d="M18 3.5v8l-1.5 1.5h-15l-1.5-1.5v-8l1.5-1.5h3.5l2-2h4l2 2h3.5l1.5 1.5zm-9 7.5c1.9 0 3.5-1.6 3.5-3.5s-1.6-3.5-3.5-3.5-3.5 1.6-3.5 3.5 1.6 3.5 3.5 3.5z"></path></svg></span><span class="dcr-1qvd3m6">Rabbit Inc’s R1, an ‘intuitive companion device’.</span></figcaption></figure><p class="dcr-4cudl2">The world of atoms moves more slowly than the world of bits. The November 2022 launch of ChatGPT led to a flurry of activity. But where digital competitors launched in a matter of weeks, we’re only now starting to see the physical ramifications of the AI revolution.</p><p class="dcr-4cudl2">On Monday, AI-search-engine-for-your-mind startup Limitless revealed its first physical product, a $99 pendant that you wear on your shirt to record, well, everything. <a href="https://www.theverge.com/2024/4/15/24130832/limitless-ai-pendant-wearable-meetings" data-link-name="in body link">From the Verge</a>:</p><blockquote class="dcr-1vcglxy">
  78. <p><em>The $99 device is meant to be with you all the time</em><em> … and uses beam-forming tech to more clearly record the person speaking to you and not the rest of the coffee shop or auditorium. Limitless can do a lot to help you keep track of conversations. What was that new app someone mentioned in the board meeting? What restaurant did Shannon say we should go to next time? Where did I leave off with Jake when we met two weeks ago? In theory, Limitless can get that data and use AI models to get it back to you any time you ask.</em></p>
  79. </blockquote><p class="dcr-4cudl2">It’s a genuinely exciting space to cover because no one actually knows what AI hardware should be. Limitless has one answer; <a href="https://www.rabbit.tech/updates/introducing-r1" data-link-name="in body link">Rabbit has a very different one</a>, with its R1:</p><blockquote class="dcr-1vcglxy">
  80. <p><em>R1 is built as an intuitive companion device that saves users time. While phones have evolved into all-encompassing personal entertainment devices in recent years, r1 is positioned as a standalone hardware portal to cut through distractions and help users handle their everyday digital tasks smarter, more efficiently, and more delightfully.</em></p>
  81. </blockquote><p class="dcr-4cudl2">Looking like a small, square smartphone, the R1 is a push-button partner to an AI agent which, the company says, can be trained to carry out tasks on your behalf. The physical object, designed by renowned consultancy Teenage Engineering, looks delectable, but the whole thing rides on whether the AI agent at its heart can actually be trusted. At its best, it could bring powerful AI assistants into our daily lives; at its worst, it would just make you nostalgic for Siri.</p><p class="dcr-4cudl2">And the worst is not impossible. Humane is the first major company to get AI hardware to market, with its AI Pin – and it’s not gone well. From <a href="https://www.theverge.com/24126502/humane-ai-pin-review" data-link-name="in body link">the Verge’s review</a>:</p><blockquote class="dcr-1vcglxy">
  82. <p><em>As the overall state of AI improves, the AI Pin will probably get better, and I’m bullish on AI’s long-term ability to do a lot of fiddly things on our behalf. But there are too many basic things it can’t do, too many things it doesn’t do well enough, and too many things it does well but only sometimes that I’m hard-pressed to name a single thing it’s genuinely good at. None of this </em><em>– not the hardware, not the software, not even GPT-4</em><em> – is ready yet.</em></p>
  83. </blockquote><p class="dcr-4cudl2">The AI pin isn’t going to be the last piece of AI hardware we see, then. But it might be Humane’s last.</p><p class="dcr-4cudl2"><em class="dcr-4cudl2">If you want to read the complete version of the newsletter <a href="https://www.theguardian.com/info/2022/sep/20/sign-up-for-the-techscape-newsletter-our-free-technology-email" data-link-name="in body link">please subscribe</a> to receive TechScape in your inbox every Tuesday.</em></p></div>
  84. </article>
  85. <hr>
  86. <footer>
  87. <p>
  88. <a href="/david/" title="Aller à l’accueil"><svg class="icon icon-home">
  89. <use xlink:href="/static/david/icons2/symbol-defs-2021-12.svg#icon-home"></use>
  90. </svg> Accueil</a> •
  91. <a href="/david/log/" title="Accès au flux RSS"><svg class="icon icon-rss2">
  92. <use xlink:href="/static/david/icons2/symbol-defs-2021-12.svg#icon-rss2"></use>
  93. </svg> Suivre</a> •
  94. <a href="http://larlet.com" title="Go to my English profile" data-instant><svg class="icon icon-user-tie">
  95. <use xlink:href="/static/david/icons2/symbol-defs-2021-12.svg#icon-user-tie"></use>
  96. </svg> Pro</a> •
  97. <a href="mailto:david%40larlet.fr" title="Envoyer un courriel"><svg class="icon icon-mail">
  98. <use xlink:href="/static/david/icons2/symbol-defs-2021-12.svg#icon-mail"></use>
  99. </svg> Email</a> •
  100. <abbr class="nowrap" title="Hébergeur : Alwaysdata, 62 rue Tiquetonne 75002 Paris, +33184162340"><svg class="icon icon-hammer2">
  101. <use xlink:href="/static/david/icons2/symbol-defs-2021-12.svg#icon-hammer2"></use>
  102. </svg> Légal</abbr>
  103. </p>
  104. <template id="theme-selector">
  105. <form>
  106. <fieldset>
  107. <legend><svg class="icon icon-brightness-contrast">
  108. <use xlink:href="/static/david/icons2/symbol-defs-2021-12.svg#icon-brightness-contrast"></use>
  109. </svg> Thème</legend>
  110. <label>
  111. <input type="radio" value="auto" name="chosen-color-scheme" checked> Auto
  112. </label>
  113. <label>
  114. <input type="radio" value="dark" name="chosen-color-scheme"> Foncé
  115. </label>
  116. <label>
  117. <input type="radio" value="light" name="chosen-color-scheme"> Clair
  118. </label>
  119. </fieldset>
  120. </form>
  121. </template>
  122. </footer>
  123. <script src="/static/david/js/instantpage-5.1.0.min.js" type="module"></script>
  124. <script>
  125. function loadThemeForm(templateName) {
  126. const themeSelectorTemplate = document.querySelector(templateName)
  127. const form = themeSelectorTemplate.content.firstElementChild
  128. themeSelectorTemplate.replaceWith(form)
  129. form.addEventListener('change', (e) => {
  130. const chosenColorScheme = e.target.value
  131. localStorage.setItem('theme', chosenColorScheme)
  132. toggleTheme(chosenColorScheme)
  133. })
  134. const selectedTheme = localStorage.getItem('theme')
  135. if (selectedTheme && selectedTheme !== 'undefined') {
  136. form.querySelector(`[value="${selectedTheme}"]`).checked = true
  137. }
  138. }
  139. const prefersColorSchemeDark = '(prefers-color-scheme: dark)'
  140. window.addEventListener('load', () => {
  141. let hasDarkRules = false
  142. for (const styleSheet of Array.from(document.styleSheets)) {
  143. let mediaRules = []
  144. for (const cssRule of styleSheet.cssRules) {
  145. if (cssRule.type !== CSSRule.MEDIA_RULE) {
  146. continue
  147. }
  148. // WARNING: Safari does not have/supports `conditionText`.
  149. if (cssRule.conditionText) {
  150. if (cssRule.conditionText !== prefersColorSchemeDark) {
  151. continue
  152. }
  153. } else {
  154. if (cssRule.cssText.startsWith(prefersColorSchemeDark)) {
  155. continue
  156. }
  157. }
  158. mediaRules = mediaRules.concat(Array.from(cssRule.cssRules))
  159. }
  160. // WARNING: do not try to insert a Rule to a styleSheet you are
  161. // currently iterating on, otherwise the browser will be stuck
  162. // in a infinite loop…
  163. for (const mediaRule of mediaRules) {
  164. styleSheet.insertRule(mediaRule.cssText)
  165. hasDarkRules = true
  166. }
  167. }
  168. if (hasDarkRules) {
  169. loadThemeForm('#theme-selector')
  170. }
  171. })
  172. </script>
  173. </body>
  174. </html>