123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872 |
- <!doctype html><!-- This is a valid HTML5 document. -->
- <!-- Screen readers, SEO, extensions and so on. -->
- <html lang=fr>
- <!-- Has to be within the first 1024 bytes, hence before the <title>
- See: https://www.w3.org/TR/2012/CR-html5-20121217/document-metadata.html#charset -->
- <meta charset=utf-8>
- <!-- Why no `X-UA-Compatible` meta: https://stackoverflow.com/a/6771584 -->
- <!-- The viewport meta is quite crowded and we are responsible for that.
- See: https://codepen.io/tigt/post/meta-viewport-for-2015 -->
- <meta name=viewport content="width=device-width,minimum-scale=1,initial-scale=1,shrink-to-fit=no">
- <!-- Required to make a valid HTML5 document. -->
- <title>Forget privacy: you're terrible at targeting anyway (archive) — David Larlet</title>
- <!-- Generated from https://realfavicongenerator.net/ such a mess. -->
- <link rel="apple-touch-icon" sizes="180x180" href="/static/david/icons/apple-touch-icon.png">
- <link rel="icon" type="image/png" sizes="32x32" href="/static/david/icons/favicon-32x32.png">
- <link rel="icon" type="image/png" sizes="16x16" href="/static/david/icons/favicon-16x16.png">
- <link rel="manifest" href="/manifest.json">
- <link rel="mask-icon" href="/static/david/icons/safari-pinned-tab.svg" color="#5bbad5">
- <link rel="shortcut icon" href="/static/david/icons/favicon.ico">
- <meta name="apple-mobile-web-app-title" content="David Larlet">
- <meta name="application-name" content="David Larlet">
- <meta name="msapplication-TileColor" content="#da532c">
- <meta name="msapplication-config" content="/static/david/icons/browserconfig.xml">
- <meta name="theme-color" content="#f0f0ea">
- <!-- That good ol' feed, subscribe :p. -->
- <link rel=alternate type="application/atom+xml" title=Feed href="/david/log/">
-
- <meta name="robots" content="noindex, nofollow">
- <meta content="origin-when-cross-origin" name="referrer">
- <!-- Canonical URL for SEO purposes -->
- <link rel="canonical" href="https://apenwarr.ca/log/20190201">
-
- <style>
- /* http://meyerweb.com/eric/tools/css/reset/ */
- html, body, div, span,
- h1, h2, h3, h4, h5, h6, p, blockquote, pre,
- a, abbr, address, big, cite, code,
- del, dfn, em, img, ins,
- small, strike, strong, tt, var,
- dl, dt, dd, ol, ul, li,
- fieldset, form, label, legend,
- table, caption, tbody, tfoot, thead, tr, th, td,
- article, aside, canvas, details, embed,
- figure, figcaption, footer, header, hgroup,
- menu, nav, output, ruby, section, summary,
- time, mark, audio, video {
- margin: 0;
- padding: 0;
- border: 0;
- font-size: 100%;
- font: inherit;
- vertical-align: baseline;
- }
- /* HTML5 display-role reset for older browsers */
- article, aside, details, figcaption, figure,
- footer, header, hgroup, menu, nav, section { display: block; }
- body { line-height: 1; }
- blockquote, q { quotes: none; }
- blockquote:before, blockquote:after,
- q:before, q:after {
- content: '';
- content: none;
- }
- table {
- border-collapse: collapse;
- border-spacing: 0;
- }
-
- /* http://practicaltypography.com/equity.html */
- /* https://calendar.perfplanet.com/2016/no-font-face-bulletproof-syntax/ */
- /* https://www.filamentgroup.com/lab/js-web-fonts.html */
- @font-face {
- font-family: 'EquityTextB';
- src: url('/static/david/css/fonts/Equity-Text-B-Regular-webfont.woff2') format('woff2'),
- url('/static/david/css/fonts/Equity-Text-B-Regular-webfont.woff') format('woff');
- font-weight: 300;
- font-style: normal;
- font-display: swap;
- }
- @font-face {
- font-family: 'EquityTextB';
- src: url('/static/david/css/fonts/Equity-Text-B-Italic-webfont.woff2') format('woff2'),
- url('/static/david/css/fonts/Equity-Text-B-Italic-webfont.woff') format('woff');
- font-weight: 300;
- font-style: italic;
- font-display: swap;
- }
- @font-face {
- font-family: 'EquityTextB';
- src: url('/static/david/css/fonts/Equity-Text-B-Bold-webfont.woff2') format('woff2'),
- url('/static/david/css/fonts/Equity-Text-B-Bold-webfont.woff') format('woff');
- font-weight: 700;
- font-style: normal;
- font-display: swap;
- }
-
- @font-face {
- font-family: 'ConcourseT3';
- src: url('/static/david/css/fonts/concourse_t3_regular-webfont-20190806.woff2') format('woff2'),
- url('/static/david/css/fonts/concourse_t3_regular-webfont-20190806.woff') format('woff');
- font-weight: 300;
- font-style: normal;
- font-display: swap;
- }
-
-
- /* http://practice.typekit.com/lesson/caring-about-opentype-features/ */
- body {
- /* http://www.cssfontstack.com/ Palatino 99% Win 86% Mac */
- font-family: "EquityTextB", Palatino, serif;
- background-color: #f0f0ea;
- color: #07486c;
- font-kerning: normal;
- -moz-osx-font-smoothing: grayscale;
- -webkit-font-smoothing: subpixel-antialiased;
- text-rendering: optimizeLegibility;
- font-variant-ligatures: common-ligatures contextual;
- font-feature-settings: "kern", "liga", "clig", "calt";
- }
- pre, code, kbd, samp, var, tt {
- font-family: 'TriplicateT4c', monospace;
- }
- em {
- font-style: italic;
- color: #323a45;
- }
- strong {
- font-weight: bold;
- color: black;
- }
- nav {
- background-color: #323a45;
- color: #f0f0ea;
- display: flex;
- justify-content: space-around;
- padding: 1rem .5rem;
- }
- nav:last-child {
- border-bottom: 1vh solid #2d7474;
- }
- nav a {
- color: #f0f0ea;
- }
- nav abbr {
- border-bottom: 1px dotted white;
- }
-
- h1 {
- border-top: 1vh solid #2d7474;
- border-bottom: .2vh dotted #2d7474;
- background-color: #e3e1e1;
- color: #323a45;
- text-align: center;
- padding: 5rem 0 4rem 0;
- width: 100%;
- font-family: 'ConcourseT3';
- display: flex;
- flex-direction: column;
- }
- h1.single {
- padding-bottom: 10rem;
- }
- h1 span {
- position: absolute;
- top: 1vh;
- left: 20%;
- line-height: 0;
- }
- h1 span a {
- line-height: 1.7;
- padding: 1rem 1.2rem .6rem 1.2rem;
- border-radius: 0 0 6% 6%;
- background: #2d7474;
- font-size: 1.3rem;
- color: white;
- text-decoration: none;
- }
- h2 {
- margin: 4rem 0 1rem;
- border-top: .2vh solid #2d7474;
- padding-top: 1vh;
- }
- h3 {
- text-align: center;
- margin: 3rem 0 .75em;
- }
- hr {
- height: .4rem;
- width: .4rem;
- border-radius: .4rem;
- background: #07486c;
- margin: 2.5rem auto;
- }
- time {
- display: bloc;
- margin-left: 0 !important;
- }
- ul, ol {
- margin: 2rem;
- }
- ul {
- list-style-type: square;
- }
- a {
- text-decoration-skip-ink: auto;
- text-decoration-thickness: 0.05em;
- text-underline-offset: 0.09em;
- }
- article {
- max-width: 50rem;
- display: flex;
- flex-direction: column;
- margin: 2rem auto;
- }
- article.single {
- border-top: .2vh dotted #2d7474;
- margin: -6rem auto 1rem auto;
- background: #f0f0ea;
- padding: 2rem;
- }
- article p:last-child {
- margin-bottom: 1rem;
- }
- p {
- padding: 0 .5rem;
- margin-left: 3rem;
- }
- p + p,
- figure + p {
- margin-top: 2rem;
- }
-
- blockquote {
- background-color: #e3e1e1;
- border-left: .5vw solid #2d7474;
- display: flex;
- flex-direction: column;
- align-items: center;
- padding: 1rem;
- margin: 1.5rem;
- }
- blockquote cite {
- font-style: italic;
- }
- blockquote p {
- margin-left: 0;
- }
-
- figure {
- border-top: .2vh solid #2d7474;
- background-color: #e3e1e1;
- text-align: center;
- padding: 1.5rem 0;
- margin: 1rem 0 0;
- font-size: 1.5rem;
- width: 100%;
- }
- figure img {
- max-width: 250px;
- max-height: 250px;
- border: .5vw solid #323a45;
- padding: 1px;
- }
- figcaption {
- padding: 1rem;
- line-height: 1.4;
- }
- aside {
- display: flex;
- flex-direction: column;
- background-color: #e3e1e1;
- padding: 1rem 0;
- border-bottom: .2vh solid #07486c;
- }
- aside p {
- max-width: 50rem;
- margin: 0 auto;
- }
-
- /* https://fvsch.com/code/css-locks/ */
- p, li, pre, code, kbd, samp, var, tt, time, details, figcaption {
- font-size: 1rem;
- line-height: calc( 1.5em + 0.2 * 1rem );
- }
- h1 {
- font-size: 1.9rem;
- line-height: calc( 1.2em + 0.2 * 1rem );
- }
- h2 {
- font-size: 1.6rem;
- line-height: calc( 1.3em + 0.2 * 1rem );
- }
- h3 {
- font-size: 1.35rem;
- line-height: calc( 1.4em + 0.2 * 1rem );
- }
- @media (min-width: 20em) {
- /* The (100vw - 20rem) / (50 - 20) part
- resolves to 0-1rem, depending on the
- viewport width (between 20em and 50em). */
- p, li, pre, code, kbd, samp, var, tt, time, details, figcaption {
- font-size: calc( 1rem + .6 * (100vw - 20rem) / (50 - 20) );
- line-height: calc( 1.5em + 0.2 * (100vw - 50rem) / (20 - 50) );
- margin-left: 0;
- }
- h1 {
- font-size: calc( 1.9rem + 1.5 * (100vw - 20rem) / (50 - 20) );
- line-height: calc( 1.2em + 0.2 * (100vw - 50rem) / (20 - 50) );
- }
- h2 {
- font-size: calc( 1.5rem + 1.5 * (100vw - 20rem) / (50 - 20) );
- line-height: calc( 1.3em + 0.2 * (100vw - 50rem) / (20 - 50) );
- }
- h3 {
- font-size: calc( 1.35rem + 1.5 * (100vw - 20rem) / (50 - 20) );
- line-height: calc( 1.4em + 0.2 * (100vw - 50rem) / (20 - 50) );
- }
- }
- @media (min-width: 50em) {
- /* The right part of the addition *must* be a
- rem value. In this example we *could* change
- the whole declaration to font-size:2.5rem,
- but if our baseline value was not expressed
- in rem we would have to use calc. */
- p, li, pre, code, kbd, samp, var, tt, time, details, figcaption {
- font-size: calc( 1rem + .6 * 1rem );
- line-height: 1.5em;
- }
- p, li, pre, details {
- margin-left: 3rem;
- }
- h1 {
- font-size: calc( 1.9rem + 1.5 * 1rem );
- line-height: 1.2em;
- }
- h2 {
- font-size: calc( 1.5rem + 1.5 * 1rem );
- line-height: 1.3em;
- }
- h3 {
- font-size: calc( 1.35rem + 1.5 * 1rem );
- line-height: 1.4em;
- }
- figure img {
- max-width: 500px;
- max-height: 500px;
- }
- }
-
- figure.unsquared {
- margin-bottom: 1.5rem;
- }
- figure.unsquared img {
- height: inherit;
- }
-
-
-
- @media print {
- body { font-size: 100%; }
- a:after { content: " (" attr(href) ")"; }
- a, a:link, a:visited, a:after {
- text-decoration: underline;
- text-shadow: none !important;
- background-image: none !important;
- background: white;
- color: black;
- }
- abbr[title] { border-bottom: 0; }
- abbr[title]:after { content: " (" attr(title) ")"; }
- img { page-break-inside: avoid; }
- @page { margin: 2cm .5cm; }
- h1, h2, h3 { page-break-after: avoid; }
- p3 { orphans: 3; widows: 3; }
- img {
- max-width: 250px !important;
- max-height: 250px !important;
- }
- nav, aside { display: none; }
- }
-
- ul.with_columns {
- column-count: 1;
- }
- @media (min-width: 20em) {
- ul.with_columns {
- column-count: 2;
- }
- }
- @media (min-width: 50em) {
- ul.with_columns {
- column-count: 3;
- }
- }
- ul.with_two_columns {
- column-count: 1;
- }
- @media (min-width: 20em) {
- ul.with_two_columns {
- column-count: 1;
- }
- }
- @media (min-width: 50em) {
- ul.with_two_columns {
- column-count: 2;
- }
- }
-
- .gallery {
- display: flex;
- flex-wrap: wrap;
- justify-content: space-around;
- }
- .gallery figure img {
- margin-left: 1rem;
- margin-right: 1rem;
- }
- .gallery figure figcaption {
- font-family: 'ConcourseT3'
- }
-
- footer {
- font-family: 'ConcourseT3';
- display: flex;
- flex-direction: column;
- border-top: 3px solid white;
- padding: 4rem 0;
- background-color: #07486c;
- color: white;
- }
- footer > * {
- max-width: 50rem;
- margin: 0 auto;
- }
- footer a {
- color: #f1c40f;
- }
- footer .avatar {
- width: 200px;
- height: 200px;
- border-radius: 50%;
- float: left;
- -webkit-shape-outside: circle();
- shape-outside: circle();
- margin-right: 2rem;
- padding: 2px 5px 5px 2px;
- background: white;
- border-left: 1px solid #f1c40f;
- border-top: 1px solid #f1c40f;
- border-right: 5px solid #f1c40f;
- border-bottom: 5px solid #f1c40f;
- }
- </style>
-
- <h1>
- <span><a id="jumper" href="#jumpto" title="Un peu perdu ?">?</a></span>
- Forget privacy: you're terrible at targeting anyway (archive)
- <time>Pour la pérennité des contenus liés. Non-indexé, retrait sur simple email.</time>
- </h1>
- <section>
- <article>
- <h3><a href="https://apenwarr.ca/log/20190201">Source originale du contenu</a></h3>
- <p><b>Forget privacy: you're terrible at targeting anyway</b></p>
-
- <p>I don't mind letting your programs see my private data as long as I get
- something useful in exchange. But that's not what happens.</p>
-
- <p>A former co-worker told me once: "Everyone loves collecting data,
- but nobody loves analyzing it later." This claim is almost shocking, but
- people who have been involved in data collection and analysis have all seen
- it. It starts with a brilliant idea: we'll collect information about
- every click someone makes on every page in our app! And we'll track how
- long they hesitate over a particular choice! And how often they use the
- back button! How many seconds they watch our intro video before they abort!
- How many times they reshare our social media post!</p>
-
- <p>And then they do track all that. Tracking it all is easy. Add some log
- events, dump them into a database, off we go.</p>
-
- <p>But then what? Well, after that, we have to analyze it. And as someone who
- has <a href="/log/20160328">analyzed</a> a <a href="/log/20171213">lot</a> of <a href="/log/20180918">data</a>
- about various things, let me tell you: being a data analyst is difficult
- and mostly unrewarding (except financially).</p>
-
- <p>See, the problem is there's almost no way to know if you're right. (It's
- also not clear what the definition of "right" is, which I'll get to in a bit.)
- There are almost never any easy conclusions, just hard ones, and the hard
- ones are error prone. What analysts don't talk about is how many incorrect
- charts (and therefore conclusions) get made on the way to making correct
- ones. Or ones we think are correct. A good chart is so incredibly
- persuasive that it almost doesn't even matter if it's right, as long as what
- you want is to persuade someone... which is probably why newpapers,
- magazines, and lobbyists publish so many misleading charts.</p>
-
- <p>But let's leave errors aside for the moment. Let's assume, very
- unrealistically, that we as a profession are good at analyzing things. What
- then?</p>
-
- <p>Well, then, let's get rich on targeted ads and personalized recommendation
- algorithms. It's what everyone else does!</p>
-
- <p>Or do they?</p>
-
- <p>The state of personalized recommendations is surprisingly terrible. At this
- point, the top recommendation is always a clickbait rage-creating
- article about movie stars or whatever Trump did or didn't do in the last 6
- hours. Or if not an article, then a video or documentary. That's not what I
- want to read or to watch, but I sometimes get sucked in anyway, and then
- it's recommendation apocalypse time, because the algorithm now thinks I
- <em>like</em> reading about Trump, and now <em>everything</em> is Trump. Never give
- positive feedback to an AI.</p>
-
- <p>This is, by the way, the dirty secret of the machine learning movement:
- almost everything produced by ML could have been produced, more cheaply,
- using a very dumb heuristic you coded up by hand, because mostly the ML is
- trained by feeding it examples of what humans did while following a very
- dumb heuristic. There's no magic here. If you use ML to teach a computer
- how to sort through resumes, it will recommend you interview people with
- male, white-sounding names, because it turns out that's <a href="https://www.reuters.com/article/us-amazon-com-jobs-automation-insight/amazonscraps-secret-ai-recruiting-tool-that-showed-bias-against-women-idUSKCN1MK08G">what your HR
- department already
- does</a>.
- If you ask it what video a person like you wants to see next, it will
- recommend some political propaganda crap, because 50% of the time 90% of the
- people <em>do</em> watch that next, because they can't help themselves, and that's
- a pretty good success rate.</p>
-
- <p>(Side note: there really are some excellent uses of ML out there, for things
- traditional algorithms are bad at, like image processing or winning at
- strategy games. That's wonderful, but chances are good that <em>your</em> pet ML
- application is an expensive replacement for a dumb heuristic.)</p>
-
- <p>Someone who works on web search once told me that they already have an
- algorithm that guarantees the maximum click-through rate for any web search:
- just return a page full of porn links. (Someone else said you can reverse
- this to make a porn detector: any link which has a high click-through
- rate, regardless of which query it's answering, is probably porn.)</p>
-
- <p>Now, the thing is, legitimate-seeming businesses can't just give you porn
- links all the time, because that's Not Safe For Work, so the job of most
- modern recommendation algorithms is to return the closest thing to porn that
- is still Safe For Work. In other words, celebrities (ideally attractive
- ones, or at least controversial ones), or politics, or both. They walk that
- line as closely as they can, because that's the local maximum for their
- profitability. Sometimes they accidentally cross that line, and then have
- to apologize or pay a token fine, and then go back to what they were doing.</p>
-
- <p>This makes me sad, but okay, it's just math. And maybe human nature. And
- maybe capitalism. Whatever. I might not like it, but I understand it.</p>
-
- <p>My complaint is that none of the above had <em>anything</em> to do with hoarding
- my personal information.</p>
-
- <p><b>The hottest recommendations have nothing to do with me</b></p>
-
- <p>Let's be clear: the best targeted ads I will ever see are the ones I get from
- a search engine when it serves an ad for exactly the thing I was searching
- for. Everybody wins: I find what I wanted, the vendor helps me buy their
- thing, and the search engine gets paid for connecting us. I don't know
- anybody who complains about this sort of ad. It's a good ad.</p>
-
- <p>And it, too, had nothing to do with my personal information!</p>
-
- <p>Google was serving targeted search ads decades ago, before it ever occurred
- to them to ask me to log in. Even today you can still use every search
- engine web site without logging in. They all still serve ads targeted to
- your search keyword. It's an excellent business.</p>
-
- <p>There's another kind of ad that works well on me. I play video games
- sometimes, and I use Steam, and sometimes I browse through games on Steam
- and star the ones I'm considering buying. Later, when those games go on
- sale, Steam emails me to tell me they are on sale, and sometimes then I buy
- them. Again, everybody wins: I got a game I wanted (at a discount!), the
- game maker gets paid, and Steam gets paid for connecting us. And I can
- disable the emails if I want, but I don't want, because they are good ads.</p>
-
- <p>But nobody had to profile me to make that happen! Steam has my account, and
- I <em>told</em> it what games I wanted and then it sold me <em>those</em> games. That's
- not profiling, that's just remembering a list that I explicitly
- handed to you.</p>
-
- <p>Amazon shows a box that suggests I might want to re-buy certain kinds of
- consumable products that I've bought in the past. This is useful too, and
- requires no profiling other than remembering the transactions we've had with
- each other in the past, which they kinda have to do anyway. And again,
- everybody wins.</p>
-
- <p>Now, Amazon also recommends products <em>like</em> the ones I've bought before, or
- looked at before. That's, say, 20% useful. If I just bought a computer
- monitor, and you know I did because I bought it from you, then you might as
- well stop selling them to me. But for a few days after I buy any
- electronics they also keep offering to sell me USB cables, and
- they're probably right. So okay, 20% useful targeting is better than 0%
- useful. I give Amazon some credit for building a useful profile of me,
- although it's specifically a profile of stuff I did on <em>their</em> site and
- which they keep to themselves. That doesn't seem too invasive. Nobody is
- surprised that Amazon remembers what I bought or browsed on their
- site.</p>
-
- <p>Worse is when (non-Amazon) vendors get the idea that I might want something.
- (They get this idea because I visited their web site and looked at it.)
- So their advertising partner chases me around the web trying to sell me the
- same thing. They do that, even if I <em>already</em> bought it. Ironically, this
- is because of a half-hearted attempt to <em>protect</em> my privacy. The vendor
- doesn't give information about me or my transactions to their advertising
- partner (because there's an excellent chance it would land them in legal
- trouble eventually), so the advertising partner doesn't know that I bought
- it. All they know (because of the advertising partner's tracker gadget on
- the vendor's web site) is that I looked at it, so they keep advertising it
- to me just in case.</p>
-
- <p>But okay, now we're starting to get somewhere interesting. The advertiser
- has a tracker that it places on multiple sites and tracks me around. So it
- doesn't know what I bought, but it does know what I looked at, probably over
- a long period of time, across many sites.</p>
-
- <p>Using this information, its painstakingly trained AI makes conclusions about
- which other things I might want to look at, based on...</p>
-
- <p>...well, based on what? People similar to me? Things my Facebook friends
- like to look at? Some complicated matrix-driven formula humans can't
- possibly comprehend, but which is 10% better?</p>
-
- <p>Probably not. Probably what it does is infer my gender, age, income level,
- and marital status. After that, it sells me cars and gadgets if I'm a guy,
- and fashion if I'm a woman. Not because all guys like cars and gadgets, but
- because some very uncreative human got into the loop and said "please sell
- my car mostly to men" and "please sell my fashion items mostly to women."
- Maybe the AI infers the wrong demographic information (I know Google has
- mine wrong) but it doesn't really matter, because it's usually mostly right,
- which is better than 0% right, and advertisers get some mostly
- demographically targeted ads, which is better than 0% targeted ads.</p>
-
- <p>You <em>know</em> this is how it works, right? It has to be. You can infer it
- from how bad the ads are. Anyone can, in a few seconds, think of some stuff
- they really want to buy which The Algorithm has failed to offer them, all
- while Outbrain makes zillions of dollars sending links about car insurance
- to non-car-owning Manhattanites. It might as well be a 1990s late-night TV
- infomercial, where all they knew for sure about my demographic profile is
- that I was still awake.</p>
-
- <p>You tracked me everywhere I go, logging it forever, begging for someone to
- steal your database, desperately fearing that some new EU privacy regulation
- might destroy your business... for <em>this</em>?</p>
-
- <p><b>Statistical Astrology</b></p>
-
- <p>Of course, it's not really as simple as that. There is not just one
- advertising company tracking me across every web site I visit. There are...
- many advertising companies tracking me across every web site I visit. Some
- of them don't even do advertising, they just do tracking, and they sell that
- tracking data to advertisers who supposedly use it to do better targeting.</p>
-
- <p>This whole ecosystem is amazing. Let's look at online news web sites. Why
- do they load so slowly nowadays? Trackers. No, not ads - trackers. They
- only have a few ads, which mostly don't take that long to load. But they
- have a <em>lot</em> of trackers, because each tracker will pay them a tiny bit of
- money to be allowed to track each page view. If you're a giant publisher
- teetering on the edge of bankruptcy and you have 25 trackers on your web site
- already, but tracker company #26 calls you and says they'll pay you $50k a
- year if you add their tracker too, are you going to say no? Your page runs
- like sludge already, so making it 1/25th more sludgy won't change anything,
- but that $50k might.</p>
-
- <p>("Ad blockers" remove annoying ads, but they also speed up the web, mostly
- because they remove trackers. Embarrassingly, the trackers themselves don't
- even need to cause a slowdown, but they always do, because their developers
- are invariably idiots who each need to load thousands of lines of javascript
- to do what could be done in two. But that's another story.)</p>
-
- <p>Then the ad sellers, and ad networks, buy the tracking data from all the
- trackers. The more tracking data they have, the better they can target ads,
- right? I guess.</p>
-
- <p>The brilliant bit here is that each of the trackers has a bit of data about
- you, but not all of it, because not every tracker is on every web site. But
- on the other hand, cross-referencing individuals between trackers is kinda
- hard, because none of them wants to give away their secret sauce. So each
- ad seller tries their best to cross-reference the data from all the tracker
- data they buy, but it mostly doesn't work. Let's say there are 25 trackers
- each tracking a million users, probably with a ton of overlap. In a sane
- world we'd guess that there are, at most, a few million distinct users. But
- in an insane world where you can't <em>prove</em> if there's an overlap, it could be
- as many as 25 million distinct users! The more tracker data your ad network
- buys, the more information you have! Probably! And that means better
- targeting! Maybe! And so you should buy ads from our network instead of
- the other network with less data! I guess!</p>
-
- <p>None of this works. They are still trying to sell me car insurance for my
- subway ride.</p>
-
- <p><b>It's not just ads</b></p>
-
- <p>That's a lot about profiling for ad targeting, which obviously doesn't work,
- if anyone would just stop and look at it. But there are way too many people
- incentivized to believe otherwise. Meanwhile, if you care about your
- privacy, all that matters is they're still collecting your personal
- information whether it works or not.</p>
-
- <p>What about content recommendation algorithms though? Do those work?</p>
-
- <p>Obviously not. I mean, have you tried them. Seriously.</p>
-
- <p>That's not quite fair. There are a few things that work. <a href="https://www.theserverside.com/feature/How-Pandora-built-a-better-recommendation-engine">Pandora's
- music
- recommendations</a>
- are surprisingly good, but they are doing it in a very non-obvious way. The
- obvious way is to take the playlist of all the songs your users listen to,
- blast it all into an ML training dataset, and then use that to produce a new
- playlist for new users based on... uh... their... profile? Well, they
- don't have a profile yet because they just joined. Perhaps based on the
- first few songs they select manually? Maybe, but they probably started
- with either a really popular song, which tells you nothing, or a really
- obscure song to test the thoroughness of your library, which tells you less
- than nothing.</p>
-
- <p>(I'm pretty sure this is how Mixcloud works. After each mix, it tries to
- find the "most similar" mix to continue with. Usually this is someone
- else's upload of the exact same mix. Then the "most similar" mix to that
- one is the first one, so it does that. Great job, machine learning, keep it
- up.)</p>
-
- <p>That leads us to the "random song followed by thumbs up/down" system that
- everyone uses. But everyone sucks, except Pandora. Why? Apparently
- because Pandora spent a lot of time hand-coding a bunch of music
- characteristics and writing a "real algorithm" (as opposed to ML) that tries
- to generate playlists based on the right combinations of those
- characteristics.</p>
-
- <p>In that sense, Pandora isn't pure ML. It often converges on a playlist
- you'll like within one or two thumbs up/down operations, because you're
- navigating through a multidimensional interconnected network of songs that
- people encoded the hard way, not a massive matrix of mediocre playlists
- scraped from average people who put no effort into generating those
- playlists in the first place. Pandora is bad at a lot of things (especially
- "availability in Canada") but their music recommendations are top notch.</p>
-
- <p>Just one catch. If Pandora can figure out a good playlist based
- on a starter song and one or two thumbs up/down clicks, then... I guess it's
- not profiling you. They didn't need your personal information either.</p>
-
- <p><b>Netflix</b></p>
-
- <p>While we're here, I just want to rant about Netflix, which is an odd case
- of starting off with a really good recommendation algorithm
- and then making it worse on purpose.</p>
-
- <p>Once upon a time, there was the <a href="https://journals.sagepub.com/doi/full/10.1177/1461444814538646">Netflix
- prize</a>,
- which granted $1 million to the best team that could predict people's movie
- ratings, based on their past ratings, with better accuracy than Netflix
- could themselves. (This not-so-shockingly resulted in a <a href="https://www.cs.cornell.edu/~shmat/netflix-faq.html">privacy
- fiasco</a> when it turned
- out you could de-anonymize the data set that they publicly released, oops.
- Well, that's what you get when you long-term store people's personal
- information in a database.)</p>
-
- <p>Netflix believed their business depended on a good
- recommendation algorithm. It was already pretty good: I remember using
- Netflix around 10 years ago and getting several recommendations for things I
- would never have discovered, but which I turned out to like.
- That hasn't happened to me on Netflix in a long, long time.</p>
-
- <p>As the story goes, once upon a time Netflix was a DVD-by-mail service.
- DVD-by-mail is really slow, so it was absolutely essential that at least one
- of this week's DVDs was good enough to entertain you for
- your Friday night movie. Too many Fridays with only bad movies, and
- you'd surely unsubscribe. A good recommendation system was key. (I guess
- there was also some interesting math around trying to make sure to rent out
- as much of the inventory as possible each week, since having a zillion
- copies of the most recent blockbuster, which would be popular this month and
- then die out next month, was not really viable.)</p>
-
- <p>Eventually though, Netflix moved online, and the cost of a bad
- recommendation was much less: just stop watching and switch to a new movie.
- Moreover, it was perfectly fine if everyone watched the same blockbuster.
- In fact, it was better, because they could cache it at your ISP and caches
- always work better if people are boring and average.</p>
-
- <p>Worse, as the story goes, Netflix noticed a pattern: the more hours people
- watch, the less likely they are to cancel. (This makes sense: the more
- hours you spend on Netflix, the more you feel like you "need" it.) And with
- new people trying the service at a fixed or proportional rate, higher retention
- translates directly to faster growth.</p>
-
- <p>When I heard this was also when I learned the word
- "<a href="https://en.wikipedia.org/wiki/Satisficing">satisficing</a>," which
- essentially means searching through sludge not for the best option, but for
- a good enough option. Nowadays Netflix isn't about finding the best movie,
- it's about satisficing. If it has the choice between an award-winning movie
- that you 80% might like or 20% might hate, and a mainstream movie that's 0%
- special but you 99% won't hate, it will recommend the second one every time.
- Outliers are bad for business.</p>
-
- <p>The thing is, you don't need a risky, privacy-invading profile to recommend
- a mainstream movie. Mainstream movies are specially designed to be
- inoffensive to just about everyone. My Netflix
- recommendations screen is no longer "Recommended for you," it's "New
- Releases," and then "Trending Now," and "Watch it again."</p>
-
- <p>As promised, Netflix paid out their $1 million prize to buy the winning
- recommendation algorithm, which was even better than their old one. But
- <a href="https://medium.com/netflix-techblog/netflix-recommendations-beyond-the-5-stars-part-1-55838468f429">they didn't use it</a>, they threw it away.</p>
-
- <p>Some very expensive A/B testers determined that this is what makes me watch
- the most hours of mindless TV. Their revenues keep going up. And they
- don't even need to invade my privacy to do it.</p>
-
- <p>Who am I to say they're wrong?</p>
- </article>
- </section>
-
-
- <nav id="jumpto">
- <p>
- <a href="/david/blog/">Accueil du blog</a> |
- <a href="https://apenwarr.ca/log/20190201">Source originale</a> |
- <a href="/david/stream/2019/">Accueil du flux</a>
- </p>
- </nav>
-
- <footer>
- <div>
- <img src="/static/david/david-larlet-avatar.jpg" loading="lazy" class="avatar" width="200" height="200">
- <p>
- Bonjour/Hi!
- Je suis <a href="/david/" title="Profil public">David Larlet</a>, je vis actuellement à Montréal et j’alimente cet espace depuis 15 ans. <br>
- Si tu as apprécié cette lecture, n’hésite pas à poursuivre ton exploration. Par exemple via les <a href="/david/blog/" title="Expériences bienveillantes">réflexions bimestrielles</a>, la <a href="/david/stream/2019/" title="Pensées (dés)articulées">veille hebdomadaire</a> ou en t’abonnant au <a href="/david/log/" title="S’abonner aux publications via RSS">flux RSS</a> (<a href="/david/blog/2019/flux-rss/" title="Tiens c’est quoi un flux RSS ?">so 2005</a>).
- </p>
- <p>
- Je m’intéresse à la place que je peux avoir dans ce monde. En tant qu’humain, en tant que membre d’une famille et en tant qu’associé d’une coopérative. De temps en temps, je fais aussi des <a href="https://github.com/davidbgk" title="Principalement sur Github mais aussi ailleurs">trucs techniques</a>. Et encore plus rarement, <a href="/david/talks/" title="En ce moment je laisse plutôt la place aux autres">j’en parle</a>.
- </p>
-
- <p>
- Voici quelques articles choisis :
- <a href="/david/blog/2019/faire-equipe/" title="Accéder à l’article complet">Faire équipe</a>,
- <a href="/david/blog/2018/bivouac-automnal/" title="Accéder à l’article complet">Bivouac automnal</a>,
- <a href="/david/blog/2018/commodite-effondrement/" title="Accéder à l’article complet">Commodité et effondrement</a>,
- <a href="/david/blog/2017/donnees-communs/" title="Accéder à l’article complet">Des données aux communs</a>,
- <a href="/david/blog/2016/accompagner-enfant/" title="Accéder à l’article complet">Accompagner un enfant</a>,
- <a href="/david/blog/2016/senior-developer/" title="Accéder à l’article complet">Senior developer</a>,
- <a href="/david/blog/2016/illusion-sociale/" title="Accéder à l’article complet">L’illusion sociale</a>,
- <a href="/david/blog/2016/instantane-scopyleft/" title="Accéder à l’article complet">Instantané Scopyleft</a>,
- <a href="/david/blog/2016/enseigner-web/" title="Accéder à l’article complet">Enseigner le Web</a>,
- <a href="/david/blog/2016/simplicite-defaut/" title="Accéder à l’article complet">Simplicité par défaut</a>,
- <a href="/david/blog/2016/minimalisme-esthetique/" title="Accéder à l’article complet">Minimalisme et esthétique</a>,
- <a href="/david/blog/2014/un-web-omni-present/" title="Accéder à l’article complet">Un web omni-présent</a>,
- <a href="/david/blog/2014/manifeste-developpeur/" title="Accéder à l’article complet">Manifeste de développeur</a>,
- <a href="/david/blog/2013/confort-convivialite/" title="Accéder à l’article complet">Confort et convivialité</a>,
- <a href="/david/blog/2013/testament-numerique/" title="Accéder à l’article complet">Testament numérique</a>,
- et <a href="/david/blog/" title="Accéder aux archives">bien d’autres…</a>
- </p>
- <p>
- On peut <a href="mailto:david%40larlet.fr" title="Envoyer un courriel">échanger par courriel</a>. Si éventuellement tu souhaites que l’on travaille ensemble, tu devrais commencer par consulter le <a href="http://larlet.com">profil dédié à mon activité professionnelle</a> et/ou contacter directement <a href="http://scopyleft.fr/">scopyleft</a>, la <abbr title="Société coopérative et participative">SCOP</abbr> dont je fais partie depuis six ans. Je recommande au préalable de lire <a href="/david/blog/2018/cout-site/" title="Attention ce qui va suivre peut vous choquer">combien coûte un site</a> et pourquoi je suis plutôt favorable à une <a href="/david/pro/devis/" title="Discutons-en !">non-demande de devis</a>.
- </p>
- <p>
- Je ne traque pas ta navigation mais mon
- <abbr title="Alwaysdata, 62 rue Tiquetonne 75002 Paris, +33.184162340">hébergeur</abbr>
- conserve des logs d’accès.
- </p>
- </div>
- </footer>
- <script type="text/javascript">
- ;(_ => {
- const jumper = document.getElementById('jumper')
- jumper.addEventListener('click', e => {
- e.preventDefault()
- const anchor = e.target.getAttribute('href')
- const targetEl = document.getElementById(anchor.substring(1))
- targetEl.scrollIntoView({behavior: 'smooth'})
- })
- })()
- </script>
|