|
|
@@ -25,7 +25,7 @@ RE_ESPACE_FINE_INSECABLE = regex.compile( |
|
|
|
assemble_regexes( |
|
|
|
build_regex(r"\w?", r"[;\?!]"), # Ponctuations doubles. |
|
|
|
build_regex( |
|
|
|
r"\d", r"([ghj]|min|sec|images|mm|hab|mg|L|km|°C|GHz)(\b|$)" |
|
|
|
r"\d", r"([ghj]|min|sec|images|mm|hab|kg|mg|L|km|°C|GHz)(\b|$)" |
|
|
|
), # Unités. |
|
|
|
build_regex(r"\d", r"(Mo|Ko|Go|Mb|Kb|Gb)(\b|$)"), # Tailles de fichiers. |
|
|
|
build_regex(r"\d", r"%"), # Pourcentages. |
|
|
@@ -44,9 +44,9 @@ def insere_espaces_fines_insecables(texte): |
|
|
|
RE_ESPACE_INSECABLE = regex.compile( |
|
|
|
assemble_regexes( |
|
|
|
build_regex(r"\w?", r":"), # Deux points. |
|
|
|
build_regex(r"«", r"\w?"), # Guillemets en chevrons. |
|
|
|
build_regex(r"«", ""), # Guillemets en chevrons. |
|
|
|
# "Po" est la catégorie "Punctuation, other". |
|
|
|
build_regex(r"[\w\p{Po}]?", r"»"), # Guillemets en chevrons. |
|
|
|
build_regex("", r"»"), # Guillemets en chevrons. |
|
|
|
build_regex(r"\d", r"(?!\d)\w"), # Chiffre suivi de lettres. |
|
|
|
build_regex(r"(M\.|Mme)", r"\w"), # Titres (Monsieur, Madame). |
|
|
|
) |