A place to cache linked articles (think custom and personal wayback machine)
選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

index.html 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. <!doctype html><!-- This is a valid HTML5 document. -->
  2. <!-- Screen readers, SEO, extensions and so on. -->
  3. <html lang="fr">
  4. <!-- Has to be within the first 1024 bytes, hence before the `title` element
  5. See: https://www.w3.org/TR/2012/CR-html5-20121217/document-metadata.html#charset -->
  6. <meta charset="utf-8">
  7. <!-- Why no `X-UA-Compatible` meta: https://stackoverflow.com/a/6771584 -->
  8. <!-- The viewport meta is quite crowded and we are responsible for that.
  9. See: https://codepen.io/tigt/post/meta-viewport-for-2015 -->
  10. <meta name="viewport" content="width=device-width,initial-scale=1">
  11. <!-- Required to make a valid HTML5 document. -->
  12. <title>Automating podcast transcripts on my Mac with OpenAI Whisper (archive) — David Larlet</title>
  13. <meta name="description" content="Publication mise en cache pour en conserver une trace.">
  14. <!-- That good ol' feed, subscribe :). -->
  15. <link rel="alternate" type="application/atom+xml" title="Feed" href="/david/log/">
  16. <!-- Generated from https://realfavicongenerator.net/ such a mess. -->
  17. <link rel="apple-touch-icon" sizes="180x180" href="/static/david/icons2/apple-touch-icon.png">
  18. <link rel="icon" type="image/png" sizes="32x32" href="/static/david/icons2/favicon-32x32.png">
  19. <link rel="icon" type="image/png" sizes="16x16" href="/static/david/icons2/favicon-16x16.png">
  20. <link rel="manifest" href="/static/david/icons2/site.webmanifest">
  21. <link rel="mask-icon" href="/static/david/icons2/safari-pinned-tab.svg" color="#07486c">
  22. <link rel="shortcut icon" href="/static/david/icons2/favicon.ico">
  23. <meta name="msapplication-TileColor" content="#f7f7f7">
  24. <meta name="msapplication-config" content="/static/david/icons2/browserconfig.xml">
  25. <meta name="theme-color" content="#f7f7f7" media="(prefers-color-scheme: light)">
  26. <meta name="theme-color" content="#272727" media="(prefers-color-scheme: dark)">
  27. <!-- Is that even respected? Retrospectively? What a shAItshow…
  28. https://neil-clarke.com/block-the-bots-that-feed-ai-models-by-scraping-your-website/ -->
  29. <meta name="robots" content="noai, noimageai">
  30. <!-- Documented, feel free to shoot an email. -->
  31. <link rel="stylesheet" href="/static/david/css/style_2021-01-20.css">
  32. <!-- See https://www.zachleat.com/web/comprehensive-webfonts/ for the trade-off. -->
  33. <link rel="preload" href="/static/david/css/fonts/triplicate_t4_poly_regular.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: light), (prefers-color-scheme: no-preference)" crossorigin>
  34. <link rel="preload" href="/static/david/css/fonts/triplicate_t4_poly_bold.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: light), (prefers-color-scheme: no-preference)" crossorigin>
  35. <link rel="preload" href="/static/david/css/fonts/triplicate_t4_poly_italic.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: light), (prefers-color-scheme: no-preference)" crossorigin>
  36. <link rel="preload" href="/static/david/css/fonts/triplicate_t3_regular.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: dark)" crossorigin>
  37. <link rel="preload" href="/static/david/css/fonts/triplicate_t3_bold.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: dark)" crossorigin>
  38. <link rel="preload" href="/static/david/css/fonts/triplicate_t3_italic.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: dark)" crossorigin>
  39. <script>
  40. function toggleTheme(themeName) {
  41. document.documentElement.classList.toggle(
  42. 'forced-dark',
  43. themeName === 'dark'
  44. )
  45. document.documentElement.classList.toggle(
  46. 'forced-light',
  47. themeName === 'light'
  48. )
  49. }
  50. const selectedTheme = localStorage.getItem('theme')
  51. if (selectedTheme !== 'undefined') {
  52. toggleTheme(selectedTheme)
  53. }
  54. </script>
  55. <meta name="robots" content="noindex, nofollow">
  56. <meta content="origin-when-cross-origin" name="referrer">
  57. <!-- Canonical URL for SEO purposes -->
  58. <link rel="canonical" href="https://sixcolors.com/post/2023/02/automating-podcast-transcripts-on-my-mac-with-openai-whisper/">
  59. <body class="remarkdown h1-underline h2-underline h3-underline em-underscore hr-center ul-star pre-tick" data-instant-intensity="viewport-all">
  60. <article>
  61. <header>
  62. <h1>Automating podcast transcripts on my Mac with OpenAI Whisper</h1>
  63. </header>
  64. <nav>
  65. <p class="center">
  66. <a href="/david/" title="Aller à l’accueil"><svg class="icon icon-home">
  67. <use xlink:href="/static/david/icons2/symbol-defs-2021-12.svg#icon-home"></use>
  68. </svg> Accueil</a> •
  69. <a href="https://sixcolors.com/post/2023/02/automating-podcast-transcripts-on-my-mac-with-openai-whisper/" title="Lien vers le contenu original">Source originale</a>
  70. </p>
  71. </nav>
  72. <hr>
  73. <figure><img decoding="async" src="https://i0.wp.com/sixcolors.com/wp-content/uploads/2023/02/psychic-notepad-6c.png?ssl=1" alt="a demo podcast transcript" data-image-w="" data-image-h="" class=" jetpack-broken-image" data-recalc-dims="1"><figcaption>A little section of Upgrade 444 in David Smith’s original Podsearch engine.</figcaption></figure>
  74. <p>A while ago, David Smith created a site called <a href="http://podsearch.david-smith.org">Podsearch</a>, a search engine for a few of his favorite podcasts, including a couple of mine. That project went by the wayside after a while, and I found myself getting frustrated during episodes of <a href="https://relay.fm/upgrade">Upgrade</a> that I couldn’t refer people back to specific episodes where we had already discussed a topic.</p>
  75. <p>About the same time, I began reading about <a href="https://openai.com/blog/whisper/">OpenAI Whisper</a>, an automatic speech recognition system that “approaches human level robustness and accuracy” for converting the spoken word into written text. Up until then, I’d been doing speech-to-text—most notably, for my <a href="https://sixcolors.com/tag/transcripts/">transcripts of Apple results calls</a> using various services (Trint, Rev) that charge by the minute.</p>
  76. <p>Whisper’s free, and you can run it on your own computer. I thought that I might give Whisper a go in transcribing Upgrade—or at least recent episodes of Upgrade, maybe since episode 400—for my own reference.</p>
  77. <p>I rapidly discovered that while the <a href="https://github.com/openai/whisper">python implementation of Whisper</a> would run on my Mac, it ran at about 0.5x speed—so a two-hour podcast would take four hours to transcribe. Not great. Still, the results were promising. Here’s <a href="https://sixcolors.com/post/2017/03/the-dream-of-converting-podcasts-into-text/">the state of the art</a> of podcast transcription circa 2017:</p>
  78. <blockquote><p>
  79. Alright we’re going to wrap it up that this ends this edition of our red chickens with Batman that are affiliated with like extension cords for Batman University I’d like to think my gas for being here and watching some Batman movies with me… and told her I think you were the king of the Wicker people. Goodnight everybody for listening to be uncomfortable I’ve been your Hostess and smell but really I Batman.
  80. </p></blockquote>
  81. <p>And here’s how Whisper fared:</p>
  82. <blockquote><p>
  83. All right, we’re gonna wrap it up. This ends this edition of our check-ins with Batman that are affiliated. It’s like extension course for Batman University. I’d like to thank my guests for being here and watching some Batman movies with me…. And Tony Sindelar, I think you were the king of the Wicker people. Goodbye nerds. And thanks everybody for listening to The Incomparable. I’ve been your host Jason Snell. But really, I’m Batman. Hmm.
  84. </p></blockquote>
  85. <p>While not perfect, Whisper was <em>staggeringly</em> better than the 2017 transcript and really, much better than any other AI-driven transcription I’d tried recently. It got the punctuation. It got proper names. And it didn’t turn “Thanks for listening to The Incomparable, I’ve been your host Jason Snell” into “Goodnight everybody for listening to be uncomfortable, I’ve been your Hostess and smell.”</p>
  86. <p>Fortunately, a fellow named Georgi Gerganov made a <a href="https://github.com/ggerganov/whisper.cpp">C++-native port of Whisper</a> that is easy to install and run on macOS and is optimized for Apple silicon. I downloaded and installed Gerganov’s version, downloaded the medium English model, and discovered that it could transcribe a podcast at rates up to 2x!</p>
  87. <p>This was great, but the last thing I needed was to have to remember all the arcane command-line commands required to get the files in the right place. So instead, I wrote <a href="https://www.icloud.com/shortcuts/10daa20be4774b629a04e214416ed3e2">The Transcriptor</a>, a Shortcut that lets me control-click on audio files and turn them into transcripts in a format of my choice. (I also pointed Whisper at an episode of Total Party Kill and it <a href="https://www.youtube.com/watch?v=tzg_QhehQKs">made a remarkably good subtitle track</a> ready for uploading to YouTube.)</p>
  88. <figure><img decoding="async" src="https://i0.wp.com/sixcolors.com/wp-content/uploads/2023/02/whisper-shortcut-6c.png?ssl=1" alt="shortcut action block" data-image-w="" data-image-h="" class=" jetpack-broken-image" data-recalc-dims="1"><figcaption>Who needs to remember all this stuff?</figcaption></figure>
  89. <p>Along the way I mentioned what I was doing to David Smith, who sent me his code for PodSearch so I could use it to generate my Upgrade archive. This apparently turned David on to Whisper and he’s since <a href="http://podsearch.david-smith.org">revived the site</a> with Whisper-derived transcripts of seven podcasts, including Upgrade.</p>
  90. <p>Then last week, Apple’s financial results came out. Rather than using <a href="https://www.rev.com">Rev</a>, which I had been using to generate and correct transcripts the past few years, I decide to use Whisper and The Transcriptor to do the job.</p>
  91. <p>Other than a few hiccups involving using separate tools to record, transcribe, edit, and play back audio—I need to figure out a more complete workflow there—it worked <em>spectacularly</em> well. Over the years I’ve internalized all the Apple financial analyst call-specific phrases that the AI engine used by Rev would get wrong, which I’d need to correct. Almost all of them were rendered correctly by Whisper! I had to do less to get <a href="https://sixcolors.com/post/2023/02/this-is-tim-apple-q1-2023-analyst-call-transcript/">the transcript</a> in good shape than I ever have before.</p>
  92. <p>This is not to say that web apps like Rev aren’t always seeking better speech-to-text systems, and might even adopt Whisper themselves. And those services add other nice features—like the integration of audio playback and text editing—that definitely make editing a transcript easier than what I did. (I was editing in BBEdit and clicking into Overcast—playing back uploaded MP3 files at 1.5x speeds—when I needed to pause or back up.)</p>
  93. <p>Still… this is amazing. If I have learned anything from this journey, it’s that the ability to generate high-quality, readable transcripts from podcast audio is going to be here soon. It’s not quite here yet—Whisper has quirks that make it better for searchable transcripts than actual reading, and it doesn’t identify speakers—but it’s perilously close now.</p>
  94. <p>While reading a podcast transcript will never be the same as listening to the podcast, providing usable transcripts will make podcast content more accessible, searchable, and able to be referenced. It’s all just around the corner now.</p>
  95. </article>
  96. <hr>
  97. <footer>
  98. <p>
  99. <a href="/david/" title="Aller à l’accueil"><svg class="icon icon-home">
  100. <use xlink:href="/static/david/icons2/symbol-defs-2021-12.svg#icon-home"></use>
  101. </svg> Accueil</a> •
  102. <a href="/david/log/" title="Accès au flux RSS"><svg class="icon icon-rss2">
  103. <use xlink:href="/static/david/icons2/symbol-defs-2021-12.svg#icon-rss2"></use>
  104. </svg> Suivre</a> •
  105. <a href="http://larlet.com" title="Go to my English profile" data-instant><svg class="icon icon-user-tie">
  106. <use xlink:href="/static/david/icons2/symbol-defs-2021-12.svg#icon-user-tie"></use>
  107. </svg> Pro</a> •
  108. <a href="mailto:david%40larlet.fr" title="Envoyer un courriel"><svg class="icon icon-mail">
  109. <use xlink:href="/static/david/icons2/symbol-defs-2021-12.svg#icon-mail"></use>
  110. </svg> Email</a> •
  111. <abbr class="nowrap" title="Hébergeur : Alwaysdata, 62 rue Tiquetonne 75002 Paris, +33184162340"><svg class="icon icon-hammer2">
  112. <use xlink:href="/static/david/icons2/symbol-defs-2021-12.svg#icon-hammer2"></use>
  113. </svg> Légal</abbr>
  114. </p>
  115. <template id="theme-selector">
  116. <form>
  117. <fieldset>
  118. <legend><svg class="icon icon-brightness-contrast">
  119. <use xlink:href="/static/david/icons2/symbol-defs-2021-12.svg#icon-brightness-contrast"></use>
  120. </svg> Thème</legend>
  121. <label>
  122. <input type="radio" value="auto" name="chosen-color-scheme" checked> Auto
  123. </label>
  124. <label>
  125. <input type="radio" value="dark" name="chosen-color-scheme"> Foncé
  126. </label>
  127. <label>
  128. <input type="radio" value="light" name="chosen-color-scheme"> Clair
  129. </label>
  130. </fieldset>
  131. </form>
  132. </template>
  133. </footer>
  134. <script src="/static/david/js/instantpage-5.1.0.min.js" type="module"></script>
  135. <script>
  136. function loadThemeForm(templateName) {
  137. const themeSelectorTemplate = document.querySelector(templateName)
  138. const form = themeSelectorTemplate.content.firstElementChild
  139. themeSelectorTemplate.replaceWith(form)
  140. form.addEventListener('change', (e) => {
  141. const chosenColorScheme = e.target.value
  142. localStorage.setItem('theme', chosenColorScheme)
  143. toggleTheme(chosenColorScheme)
  144. })
  145. const selectedTheme = localStorage.getItem('theme')
  146. if (selectedTheme && selectedTheme !== 'undefined') {
  147. form.querySelector(`[value="${selectedTheme}"]`).checked = true
  148. }
  149. }
  150. const prefersColorSchemeDark = '(prefers-color-scheme: dark)'
  151. window.addEventListener('load', () => {
  152. let hasDarkRules = false
  153. for (const styleSheet of Array.from(document.styleSheets)) {
  154. let mediaRules = []
  155. for (const cssRule of styleSheet.cssRules) {
  156. if (cssRule.type !== CSSRule.MEDIA_RULE) {
  157. continue
  158. }
  159. // WARNING: Safari does not have/supports `conditionText`.
  160. if (cssRule.conditionText) {
  161. if (cssRule.conditionText !== prefersColorSchemeDark) {
  162. continue
  163. }
  164. } else {
  165. if (cssRule.cssText.startsWith(prefersColorSchemeDark)) {
  166. continue
  167. }
  168. }
  169. mediaRules = mediaRules.concat(Array.from(cssRule.cssRules))
  170. }
  171. // WARNING: do not try to insert a Rule to a styleSheet you are
  172. // currently iterating on, otherwise the browser will be stuck
  173. // in a infinite loop…
  174. for (const mediaRule of mediaRules) {
  175. styleSheet.insertRule(mediaRule.cssText)
  176. hasDarkRules = true
  177. }
  178. }
  179. if (hasDarkRules) {
  180. loadThemeForm('#theme-selector')
  181. }
  182. })
  183. </script>
  184. </body>
  185. </html>