A place to cache linked articles (think custom and personal wayback machine)
選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

index.html 25KB


  1. <!doctype html><!-- This is a valid HTML5 document. -->
  2. <!-- Screen readers, SEO, extensions and so on. -->
  3. <html lang="en">
  4. <!-- Has to be within the first 1024 bytes, hence before the `title` element
  5. See: https://www.w3.org/TR/2012/CR-html5-20121217/document-metadata.html#charset -->
  6. <meta charset="utf-8">
  7. <!-- Why no `X-UA-Compatible` meta: https://stackoverflow.com/a/6771584 -->
  8. <!-- The viewport meta is quite crowded and we are responsible for that.
  9. See: https://codepen.io/tigt/post/meta-viewport-for-2015 -->
  10. <meta name="viewport" content="width=device-width,initial-scale=1">
  11. <!-- Required to make a valid HTML5 document. -->
  12. <title>Natural Language Geocoding (archive) — David Larlet</title>
  13. <meta name="description" content="Publication mise en cache pour en conserver une trace.">
  14. <!-- That good ol' feed, subscribe :). -->
  15. <link rel="alternate" type="application/atom+xml" title="Feed" href="/david/log/">
  16. <!-- Generated from https://realfavicongenerator.net/ such a mess. -->
  17. <link rel="apple-touch-icon" sizes="180x180" href="/static/david/icons2/apple-touch-icon.png">
  18. <link rel="icon" type="image/png" sizes="32x32" href="/static/david/icons2/favicon-32x32.png">
  19. <link rel="icon" type="image/png" sizes="16x16" href="/static/david/icons2/favicon-16x16.png">
  20. <link rel="manifest" href="/static/david/icons2/site.webmanifest">
  21. <link rel="mask-icon" href="/static/david/icons2/safari-pinned-tab.svg" color="#07486c">
  22. <link rel="shortcut icon" href="/static/david/icons2/favicon.ico">
  23. <meta name="msapplication-TileColor" content="#f7f7f7">
  24. <meta name="msapplication-config" content="/static/david/icons2/browserconfig.xml">
  25. <meta name="theme-color" content="#f7f7f7" media="(prefers-color-scheme: light)">
  26. <meta name="theme-color" content="#272727" media="(prefers-color-scheme: dark)">
  27. <!-- Is that even respected? Retrospectively? What a shAItshow…
  28. https://neil-clarke.com/block-the-bots-that-feed-ai-models-by-scraping-your-website/ -->
  29. <meta name="robots" content="noai, noimageai">
  30. <!-- Documented, feel free to shoot an email. -->
  31. <link rel="stylesheet" href="/static/david/css/style_2021-01-20.css">
  32. <!-- See https://www.zachleat.com/web/comprehensive-webfonts/ for the trade-off. -->
  33. <link rel="preload" href="/static/david/css/fonts/triplicate_t4_poly_regular.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: light), (prefers-color-scheme: no-preference)" crossorigin>
  34. <link rel="preload" href="/static/david/css/fonts/triplicate_t4_poly_bold.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: light), (prefers-color-scheme: no-preference)" crossorigin>
  35. <link rel="preload" href="/static/david/css/fonts/triplicate_t4_poly_italic.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: light), (prefers-color-scheme: no-preference)" crossorigin>
  36. <link rel="preload" href="/static/david/css/fonts/triplicate_t3_regular.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: dark)" crossorigin>
  37. <link rel="preload" href="/static/david/css/fonts/triplicate_t3_bold.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: dark)" crossorigin>
  38. <link rel="preload" href="/static/david/css/fonts/triplicate_t3_italic.woff2" as="font" type="font/woff2" media="(prefers-color-scheme: dark)" crossorigin>
  39. <script>
  40. function toggleTheme(themeName) {
  41. document.documentElement.classList.toggle(
  42. 'forced-dark',
  43. themeName === 'dark'
  44. )
  45. document.documentElement.classList.toggle(
  46. 'forced-light',
  47. themeName === 'light'
  48. )
  49. }
  50. const selectedTheme = localStorage.getItem('theme')
  51. if (selectedTheme !== 'undefined') {
  52. toggleTheme(selectedTheme)
  53. }
  54. </script>
  55. <meta name="robots" content="noindex, nofollow">
  56. <meta content="origin-when-cross-origin" name="referrer">
  57. <!-- Canonical URL for SEO purposes -->
  58. <link rel="canonical" href="https://www.element84.com/machine-learning/natural-language-geocoding/">
  59. <body class="remarkdown h1-underline h2-underline h3-underline em-underscore hr-center ul-star pre-tick" data-instant-intensity="viewport-all">
  60. <article>
  61. <header>
  62. <h1>Natural Language Geocoding</h1>
  63. </header>
  64. <nav>
  65. <p class="center">
  66. <a href="/david/" title="Aller à l’accueil"><svg class="icon icon-home">
  67. <use xlink:href="/static/david/icons2/symbol-defs-2021-12.svg#icon-home"></use>
  68. </svg> Accueil</a> •
  69. <a href="https://www.element84.com/machine-learning/natural-language-geocoding/" title="Lien vers le contenu original">Source originale</a>
  70. <br>
  71. Mis en cache le 2024-05-20
  72. </p>
  73. </nav>
  74. <hr>
  75. <p>In our increasingly data-driven world, the ability to quickly and accurately access geospatial data can transform industries from public health to urban planning. Imagine being able to locate and analyze data from ‘within 10 miles of the North Carolina coastline’ in mere seconds. Natural Language Geocoding makes this possible, bridging the gap between complex geospatial queries and user-friendly language. Let’s dive into how this innovative technology is changing the landscape of data analysis, making it more accessible and efficient. </p>
  76. <h1 class="wp-block-heading">Balancing user needs when identifying geospatial regions </h1>
  77. <p>There are different design goals when allowing the user to specify spatial regions. </p>
  78. <ol>
  79. <li><strong>It should be as easy as possible to specify the spatial region.</strong> The user shouldn’t have to spend a long time entering coordinates or dragging points around. Dragging a bounding box is an example of something that’s relatively easy on a desktop computer.</li>
  80. <li><strong>The user should be able to express their intent precisely</strong>. This means that the most relevant search results will be found and relevant data analyzed. Uploading a shapefile to specify the exact coordinates is a way for the user to specify an exact area.</li>
  81. </ol>
  82. <p>Both of these goals are important but achieving one can sometimes come at the expense of the other. Bounding boxes are a relatively simple mechanic for users, but are not very precise if the region in question doesn’t fit within a boxy shape. Conversely, dragging points to outline a more precise area with a polygon is both tricky and time-consuming. Finally, uploading an exact spatial area has the potential to produce a highly accurate region, but it requires too much time spent finding geojson areas or converting between shapefiles, KML, and GeoJSON before an upload is even possible. </p>
  83. <figure class="wp-block-image aligncenter size-full is-resized"><figcaption class="wp-element-caption">A user-drawn bounding box around Florida; quick but imprecise.</figcaption></figure>
  84. <figure class="wp-block-image aligncenter size-full is-resized"><figcaption class="wp-element-caption">A user-drawn polygon around Florida; more precise but takes time.</figcaption></figure>
  85. <h1 class="wp-block-heading">Using Natural Language Processing (NLP) to interact with spatial data</h1>
  86. <p>With the above dilemma in mind, we’ve been considering the possibilities of implementing Natural Language Processing (NLP) as a strategy for enabling both precision and simplicity. What if the user could just describe the area they want and the system was smart enough to get just that?</p>
  87. <p>The newer advances in Natural Language Processing (NLP) from Large Language Models (LLM) combined with geospatial algorithms allow us to do just that. </p>
  88. <p>Here are some examples of that.</p>
  89. <figure class="wp-block-image aligncenter size-full is-resized"><figcaption class="wp-element-caption">Natural Language “Florida” to spatial area (easy and precise).</figcaption></figure>
  90. <figure class="wp-block-image aligncenter size-full is-resized"></figure>
  91. <figure class="wp-block-image aligncenter size-full is-resized"></figure>
  92. <h1 class="wp-block-heading">What is Natural Language Geocoding? </h1>
  93. <p>Natural Language Geocoding is a technology that allows users to interact with geospatial data systems using everyday language. This approach simplifies querying complex data sets without the need for specialized training in Geographic Information Systems (GIS).</p>
  94. <p>If you look at traditional Geocoding APIs they use conventional search approaches to convert region names like “Canada” or addresses “123 Main St. New York, NY” to points or polygons. Natural Language Geocoding goes beyond this traditional approach in a few ways:</p>
  95. <ol>
  96. <li>Users can specify region names, names of physical features, coastlines, etc.</li>
  97. <li>Users can specify multiple regions; they can include spatial operations in their descriptions like “Between Florida and Cuba” or “Within 10 miles of Washington DC”</li>
  98. <li>The end result of our approach generates a single Geometry, such as points, polygons, lines, etc, that precisely defines the area that was described by the user.</li>
  99. </ol>
  100. <h1 class="wp-block-heading">How would Natural Language Geocoding look in practice? </h1>
  101. <p>We see Natural Language Geocoding as a component of a larger AI/Ml strategy. It could support many different use cases whenever a user needs to express a spatial area. We’ve highlighted a few examples below, but the possibilities extend far beyond the scope of this list. </p>
  102. <h2 class="wp-block-heading">Geospatial Data Discovery for Climate Change Analysis, Habitat Protection, and Scientific Research</h2>
  103. <p>Imagine a feature including Natural Language Geocoding in a tool such as <a href="https://search.earthdata.nasa.gov/search" target="_blank" rel="noreferrer noopener">NASA’s Earthdata Search</a>. Scientists and researchers want to quickly identify and obtain the data they need without downloading data outside of the boundaries of their area of study. With Natural Language Geocoding, they’d be able to specify specific areas like “within 10 miles of the North Carolina coastline” in only a fraction of the time required to identify such a specific region without this feature. </p>
  104. <p>Here’s an example of a tool that combines Image Embedding models with Natural Language Geocoding to allow searching for any features visible through satellite imagery. </p>
  105. <figure class="wp-block-image aligncenter size-full is-resized"><img src="/_gatsby/file/a4cb13ce9c447722fde717bd9580922c/image3-1.png?u=https%3A%2F%2Felement84.flywheelsites.com%2Fwp-content%2Fuploads%2F2024%2F05%2Fimage3-1.png" alt="Partial screenshot of the world map, the Natural Language Geocoding query selected is &amp;quot;Show me algal blooms within 2 miles of Cape Cod&amp;quot; and the portion of Massachusetts is appropriately outlined in a blue line. There is an image of the algal blooms tiled next to the map. " class="wp-image-25351 inline-gatsby-image-wrapper"></figure>
  106. <h2 class="wp-block-heading">Geospatial Data Discovery for Healthcare and Epidemiology</h2>
  107. <p>In healthcare, both time and accuracy are crucial. With access to this tool, public health officials and researchers can quickly identify spatial areas  related to disease outbreaks or healthcare access using natural language, streamlining the identification of high-risk areas and planning of healthcare services. For example, during an outbreak of dengue fever, public health officials can use Natural Language Geocoding to quickly locate all reported cases within ’50 miles of Rio de Janeiro city center’, allowing for rapid response and resource allocation.</p>
  108. <h2 class="wp-block-heading">Geospatial Data Discovery for Education</h2>
  109. <p>The introduction of Natural Language Geocoding makes geospatial concepts more accessible to students and educators. With this tool, real-world spatial data analysis is integrated into the classroom without the need for complex GIS software training.</p>
  110. <h2 class="wp-block-heading">Geospatial Data Discovery for Public Policy/​​Real Estate/Urban Planning</h2>
  111. <p>When it comes to policy and housing, through leveraging Natural Language Geocoding professionals become able to monitor urban growth, land use changes, and compliance with zoning laws, making governance more efficient and data-driven.</p>
  112. <p>Widespread adoption of Natural Language Geocoding also streamlines the analysis of land use, zoning, and urban development projects by allowing planners and real estate developers to query geospatial data repositories with natural language, making site selection and planning processes more efficient.</p>
  113. <h1 class="wp-block-heading">How does Natural Language Geocoding work? </h1>
  114. <h2 class="wp-block-heading">Spatial Operations in Natural Language Requests</h2>
  115. <p>Natural language requests can be understood as directed graphs. In order for users to identify particular regions, different combinations can be interpreted as spatial operations: </p>
  116. <ol>
  117. <li>In “California <strong>and</strong> Nevada”, the inclusion of “and” represents a union of two regions</li>
  118. <li>In “The Great Lakes <strong>in</strong> Canada”, the inclusion of “in” represents the Intersection of two regions</li>
  119. <li><strong>“Within 50 KM</strong> of Madrid” utilizes a buffer zone by including “within” and a specific KM range</li>
  120. </ol>
  121. <p>By combining several of the above operations such as in, “The Iberian Peninsula north of Barcelona and France west of Paris”, the relative complexity and specificity can be increased. </p>
  122. <p>These combinations can be thought of as a tree structure such as:</p>
  123. <ul>
  124. <li>Union
  125. <ul>
  126. <li>Directional Constraint
  127. <ul>
  128. <li>Named Location: “Iberian Peninsula”</li>
  129. <li>North
  130. <ul>
  131. <li>Named Location: Barcelona</li>
  132. </ul>
  133. </li>
  134. </ul>
  135. </li>
  136. <li>Directional Constraint
  137. <ul>
  138. <li>Named Location: “France”</li>
  139. <li>West
  140. </li>
  141. </ul>
  142. </li>
  143. </ul>
  144. </li>
  145. </ul>
  146. <h2 class="wp-block-heading">Large Language Models (LLMs) convert user intent into a tree structure</h2>
  147. <p>LLMs have improved dramatically when it comes to converting user requests into data structures, and can even produce valid programmatic representations like JSON. This is the same approach that’s used for Agents, which allow LLMs to use APIs based on user requests. We’ve been able to use general purpose LLMs, like Claude 3 Sonnet, with a system prompt to perform conversion from a user query into a tree structure.</p>
  148. <p>Our system prompt is divided into a few sections</p>
  149. <ol>
  150. <li>Overview – Tells the LLM the overview of the job it has to perform</li>
  151. <li>JSON Schema – Lists the JSON schema that that output must conform to.</li>
  152. <li>Examples – A few examples of user queries and the output response.</li>
  153. <li>Guidelines – A list of important rules to make sure the LLM generates the expected output.</li>
  154. </ol>
  155. <p>The LLM returns a text based response which we can parse into instances of classes representing each node type. </p>
  156. <h2 class="wp-block-heading">Processing the tree</h2>
  157. <p>At this point, processing the tree is fairly straightforward. Each node of the tree is processed from the leaves to the root. Named locations like “Washington DC” are converted into a Polygon using a database of known areas. Other tree nodes that are spatial operations are processed using geospatial algorithms. The end result is a single geospatial area that can be converted to GeoJSON or other formats for use. </p>
  158. <h1 class="wp-block-heading">Natural Language Geocoding Challenges and Potential Solutions </h1>
  159. <p>I’ve encountered a few challenges with this approach, some of which I’ve been able to overcome and others that will take more time.</p>
  160. <h2 class="wp-block-heading">Disambiguating multiple responses in a geocoding API</h2>
  161. <p>Geocoding databases can contain multiple entries for the same item like “Mississippi River”. When you ask it for something, like “Mississippi River”, there are multiple results that come back and only one of them refers to the entire river. You have to make sure to select the one that’s most likely going to refer to the area that the user wants.</p>
  162. <p>Other examples of ambiguities can be present in an area’s name such as Portland, Maine and Paris, Texas. If the user asks for an area which is ambiguous, such as a city like Paris or Portland, there are a few ways to determine what the user actually means. </p>
  163. <p>Firstly, context clues may provide an immediate resolution. For example if the user’s request is, “In Texas west of Paris” we can be confident that Paris, Texas is the city being referred to. If the request does not provide sufficient context, it may be possible to ask the user directly for clarification. Portland could refer to the city in Oregon or Maine. We can instruct the LLM to detect situations like this and tell us to prompt the user to clear the ambiguity. Finally, feedback may be available via visual maps. Pairing natural language understanding with visual tools, such as maps, is a clear way for users to confirm that their request was interpreted correctly. Because “Los Angeles” could mean the official boundary of Los Angeles or a greater Los Angeles metro area, a visual confirmation that the correct area was selected is a quick way to disambiguate and avoid confusion. </p>
  164. <h2 class="wp-block-heading">Missing areas</h2>
  165. <p>Geocoding APIs contain things like countries, cities, individual addresses, and some natural features like rivers. This being said, the natural features included in these APIs are not all inclusive. Other natural features or broad areas like “Rocky Mountains” or the “Congo River Basin Rainforest” typically won’t be in a Geocoding API.</p>
  166. <p>To remedy this problem, we’ll have to identify alternate sources for these kinds of areas. When it comes to coastlines specifically, I found a Natural Earth GeoJSON source containing all of the coastlines for the entire world. If a user requests an area like “California Coastline”, I can perform a spatial intersection of “California” plus a small buffer with the world coastline shape to extract the particular coastline of interest.</p>
  167. <p>Options like the <a href="https://github.com/martynafford/natural-earth-geojson" target="_blank" rel="noreferrer noopener">Natural Earth GeoJSON Github Repository</a> contain a large number of additional features like “Rocky Mountains”, but it is not a completely comprehensive resource. This being said, we haven’t found one single source that presents a full picture of this type of information. Further research and development is required on this topic, but a comprehensive understanding of natural features will likely require pulling from multiple sources like the repository referenced above and government websites.</p>
  168. <h1 class="wp-block-heading">Final Thoughts</h1>
  169. <p>Element 84 has a robust background in geospatial data discovery and analysis. Throughout our time in the space, we’ve worked to streamline the process for users to find, acquire, and use the geospatial data they need  for various purposes. On a related note, we pride ourselves in our ability to maintain an current and up-to-date understanding of geospatial technologies, even as they evolve rapidly (and for a more comprehensive picture of our thoughts on up and coming technology, you can find more in our 2023 Geospatial Technology Radar). </p>
  170. <p>As we detail throughout this blog, the introduction of Natural Language Geocoding as a tool in identifying geospatial regions represents huge potential for both reducing the time and effort required by users while also increasing precision and accuracy throughout the process. In addition to these general benefits, Natural Language Geocoding has huge potential in a variety of specific applications including climate change analysis, healthcare, education, and housing policy. </p>
  171. <p>We’re working towards integrating natural language capabilities into geospatial analysis. At E84 we’re excited about anything we can do to shorten the distance and effort required to uncover answers to our world’s biggest questions, and increasing efficiency from the geospatial perspective is a large part of that. We’re working on <a href="https://element84.com/machine-learning/towards-a-queryable-earth-with-vision-language-foundation-models/" target="_blank" rel="noreferrer noopener">other innovative solutions</a> and we’d love to hear from you if you’re interested in learning more about anything we’ve outlined in this post. Do you have applications that would benefit from Natural Language Geocoding? Are there particular ways of phrasing spatial areas that might be unique to your domain or user base? – <a href="https://www.element84.com/who-we-are/contact-us/" target="_blank" rel="noreferrer noopener">connect with our team directly on our contact us page</a>!</p>
  172. <h1 class="wp-block-heading">Launching A New Geospatial AI Community</h1>
  173. <p>Like many others in our community, we appreciate openness and collaboration between groups. We’re passionate about the intersection of geospatial technology and artificial intelligence and want to work with other enthusiasts, professionals, and newcomers who feel the same. We want to actively encourage a collaborative and vibrant environment where members can share knowledge, discuss challenges, and explore innovative solutions within geospatial AI. As we begin to shape this new community, we invite individuals from all backgrounds to provide their input on how we can shape this community to best support learning, sharing, and innovation in applying AI/ML to geospatial workloads. If you’re interested in getting involved, <a href="https://forms.gle/s5bayqE3cnRoqMkV9" target="_blank" rel="noreferrer noopener">fill out this form to learn more, express your interest, and share your ideas</a>.</p>
  174. </article>
  175. <hr>
  176. <footer>
  177. <p>
  178. <a href="/david/" title="Aller à l’accueil"><svg class="icon icon-home">
  179. <use xlink:href="/static/david/icons2/symbol-defs-2021-12.svg#icon-home"></use>
  180. </svg> Accueil</a> •
  181. <a href="/david/log/" title="Accès au flux RSS"><svg class="icon icon-rss2">
  182. <use xlink:href="/static/david/icons2/symbol-defs-2021-12.svg#icon-rss2"></use>
  183. </svg> Suivre</a> •
  184. <a href="http://larlet.com" title="Go to my English profile" data-instant><svg class="icon icon-user-tie">
  185. <use xlink:href="/static/david/icons2/symbol-defs-2021-12.svg#icon-user-tie"></use>
  186. </svg> Pro</a> •
  187. <a href="mailto:david%40larlet.fr" title="Envoyer un courriel"><svg class="icon icon-mail">
  188. <use xlink:href="/static/david/icons2/symbol-defs-2021-12.svg#icon-mail"></use>
  189. </svg> Email</a> •
  190. <abbr class="nowrap" title="Hébergeur : Alwaysdata, 62 rue Tiquetonne 75002 Paris, +33184162340"><svg class="icon icon-hammer2">
  191. <use xlink:href="/static/david/icons2/symbol-defs-2021-12.svg#icon-hammer2"></use>
  192. </svg> Légal</abbr>
  193. </p>
  194. <template id="theme-selector">
  195. <form>
  196. <fieldset>
  197. <legend><svg class="icon icon-brightness-contrast">
  198. <use xlink:href="/static/david/icons2/symbol-defs-2021-12.svg#icon-brightness-contrast"></use>
  199. </svg> Thème</legend>
  200. <label>
  201. <input type="radio" value="auto" name="chosen-color-scheme" checked> Auto
  202. </label>
  203. <label>
  204. <input type="radio" value="dark" name="chosen-color-scheme"> Foncé
  205. </label>
  206. <label>
  207. <input type="radio" value="light" name="chosen-color-scheme"> Clair
  208. </label>
  209. </fieldset>
  210. </form>
  211. </template>
  212. </footer>
  213. <script src="/static/david/js/instantpage-5.1.0.min.js" type="module"></script>
  214. <script>
  215. function loadThemeForm(templateName) {
  216. const themeSelectorTemplate = document.querySelector(templateName)
  217. const form = themeSelectorTemplate.content.firstElementChild
  218. themeSelectorTemplate.replaceWith(form)
  219. form.addEventListener('change', (e) => {
  220. const chosenColorScheme = e.target.value
  221. localStorage.setItem('theme', chosenColorScheme)
  222. toggleTheme(chosenColorScheme)
  223. })
  224. const selectedTheme = localStorage.getItem('theme')
  225. if (selectedTheme && selectedTheme !== 'undefined') {
  226. form.querySelector(`[value="${selectedTheme}"]`).checked = true
  227. }
  228. }
  229. const prefersColorSchemeDark = '(prefers-color-scheme: dark)'
  230. window.addEventListener('load', () => {
  231. let hasDarkRules = false
  232. for (const styleSheet of Array.from(document.styleSheets)) {
  233. let mediaRules = []
  234. for (const cssRule of styleSheet.cssRules) {
  235. if (cssRule.type !== CSSRule.MEDIA_RULE) {
  236. continue
  237. }
  238. // WARNING: Safari does not have/supports `conditionText`.
  239. if (cssRule.conditionText) {
  240. if (cssRule.conditionText !== prefersColorSchemeDark) {
  241. continue
  242. }
  243. } else {
  244. if (cssRule.cssText.startsWith(prefersColorSchemeDark)) {
  245. continue
  246. }
  247. }
  248. mediaRules = mediaRules.concat(Array.from(cssRule.cssRules))
  249. }
  250. // WARNING: do not try to insert a Rule to a styleSheet you are
  251. // currently iterating on, otherwise the browser will be stuck
  252. // in a infinite loop…
  253. for (const mediaRule of mediaRules) {
  254. styleSheet.insertRule(mediaRule.cssText)
  255. hasDarkRules = true
  256. }
  257. }
  258. if (hasDarkRules) {
  259. loadThemeForm('#theme-selector')
  260. }
  261. })
  262. </script>
  263. </body>
  264. </html>