123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263 |
- import re
-
-
- def widont(text):
- """Replaces the space between the last two words in a string with `` ``
- Works in these block tags ``(h1-h6, p, li, dd, dt)`` and also accounts for
- potential closing inline elements ``a, em, strong, span, b, i, mark``
-
- Extracted from:
- https://github.com/mintchaos/typogrify/blob/
- 20f693cbbb232ebc27733d9f1721a2cf1e7b25e3/typogrify/filters.py#L315-L368
-
- >>> widont('A very simple test')
- 'A very simple test'
- Single word items shouldn't be changed
- >>> widont('Test')
- 'Test'
- >>> widont(' Test')
- ' Test'
- >>> widont('<ul><li>Test</p></li><ul>')
- '<ul><li>Test</p></li><ul>'
- >>> widont('<ul><li> Test</p></li><ul>')
- '<ul><li> Test</p></li><ul>'
- >>> widont('<p>In a couple of paragraphs</p><p>paragraph two</p>')
- '<p>In a couple of paragraphs</p><p>paragraph two</p>'
- >>> widont('<h1><a href="#">In a link inside a heading</i> </a></h1>')
- '<h1><a href="#">In a link inside a heading</i> </a></h1>'
- >>> widont('<h1><a href="#">In a link</a> followed by other text</h1>')
- '<h1><a href="#">In a link</a> followed by other text</h1>'
- Empty HTMLs shouldn't error
- >>> widont('<h1><a href="#"></a></h1>')
- '<h1><a href="#"></a></h1>'
- >>> widont('<div>Divs get no love!</div>')
- '<div>Divs get no love!</div>'
- >>> widont('<pre>Neither do PREs</pre>')
- '<pre>Neither do PREs</pre>'
- >>> widont('<div><p>But divs with paragraphs do!</p></div>')
- '<div><p>But divs with paragraphs do!</p></div>'
-
- Adaptations:
- * add the mark element as a potential inline
- * avoid insertion of a nbsp if the sentence is a single word
-
- >>> widont("<p>Avec <mark>mon ami Marc.</mark></p>")
- '<p>Avec <mark>mon ami Marc.</mark></p>'
- >>> widont("Vraiment. Bien.")
- 'Vraiment. Bien.'
-
- """
-
- widont_finder = re.compile(
- r"""((?:</?(?:a|em|span|strong|i|b|mark)[^>]*>)|[^<>\s\.]) # must be preceded by an approved inline opening or closing tag or a nontag/nonspace
- \s+ # the space to replace
- ([^<>\s]+ # must be followed by non-tag non-space characters
- \s* # optional white space!
- (</(a|em|span|strong|i|b|mark)>\s*)* # optional closing inline tags with optional white space after each
- ((</(p|h[1-6]|li|dt|dd)>)|$)) # end with a closing p, h1-6, li or the end of the string
- """,
- re.VERBOSE,
- )
- output = widont_finder.sub(r"\1 \2", text)
-
- return output
|