diff options
author | Yorhel <git@yorhel.nl> | 2016-10-16 09:11:13 +0200 |
---|---|---|
committer | Yorhel <git@yorhel.nl> | 2016-10-16 09:11:15 +0200 |
commit | 17fc298217ca5a3ef65652d8e36adb3a2720cef7 (patch) | |
tree | 584ee88e70041a24a972caf10b26a0fb427388f9 | |
parent | 7d31f41ba811f707ce4dca44518a5d07a5828f4c (diff) |
Fix handling of URLs ending in a ⟩
I've known about this issue before, but didn't realize it was so
widespread. This fixes many links.
-rw-r--r-- | lib/ManUtils/Build.PL | 1 | ||||
-rw-r--r-- | lib/ManUtils/ManUtils.xs | 17 |
2 files changed, 15 insertions, 3 deletions
diff --git a/lib/ManUtils/Build.PL b/lib/ManUtils/Build.PL index 5d4abfe..35c453c 100644 --- a/lib/ManUtils/Build.PL +++ b/lib/ManUtils/Build.PL @@ -5,6 +5,7 @@ use Module::Build; Module::Build->new( dist_name => 'ManUtils', dist_version_from => 'ManUtils.pm', + dist_abstract => 'Utils for manned.org', pm_files => { 'ManUtils.pm' => 'lib/ManUtils.pm', }, diff --git a/lib/ManUtils/ManUtils.xs b/lib/ManUtils/ManUtils.xs index 8335e22..2e370d0 100644 --- a/lib/ManUtils/ManUtils.xs +++ b/lib/ManUtils/ManUtils.xs @@ -169,9 +169,8 @@ static void flushline(ctx_t *x) { // HTTP(s) URL. // This is just a simple q{https?://[^ ][.,;"\)>]?( |$)} match, doesn't - // always work right: - // - troff.1: ⟨http://www.gnu.org/copyleft/fdl.html⟩. <- yes, that's an Unicode character. - // - roff.7: Has quite a few issues with wrapped URLs and situations similar to the above. + // always work right, e.g.: + // - https://manned.org/spu_run/414316a1 -> URL wrapped to new line // Note: Don't use strncmp() before manually checking for 'http'. The parse // time is otherwise increased by a factor 2. if(s[0] == 'h' && s[1] == 't' && s[2] == 't' && s[3] == 'p' && (strncmp(s, "http://", 7) == 0 || strncmp(s, "https://", 8) == 0)) { @@ -189,6 +188,18 @@ static void flushline(ctx_t *x) { endchr = *sp; *(sp--) = 0; } + // Also catch a Unicode '⟩', which is how groff sometimes ends a .UR, e.g.: + // - https://manned.org/troff/c4467840 + // - https://manned.org/pass/78413b49 + // - https://manned.org/empathy-accounts/8c05b2c1 + // - https://manned.org/urn/8cb83e85 + // - https://manned.org/wine/4a699a22 + if(*sp == '\xa9' && *(sp-1) == '\x9f' && *(sp-2) == '\xe2') { + sp[1] = endchr; + sp -= 3; + endchr = sp[1]; + sp[1] = 0; + } sv_catpvf(x->dest, "<a href=\"%s\" rel=\"nofollow\">%s</a>", s, s); *(++sp) = endchr; es = s = sp; |