From 41760b40ef96d2dfd708ee652150b2eb9676b776 Mon Sep 17 00:00:00 2001 From: klonfish <1299450+klonfish@users.noreply.github.com> Date: Thu, 10 May 2018 18:29:36 +0200 Subject: [PATCH] Improved URL matching URLs with trailing special chars (e.g. slash, minus, ...) should now be matched correctly, even when followed by a non-url char --- .../ui/adapter/MessageAdapter.java | 19 ++++++++++++++++++- .../siacs/conversations/utils/Patterns.java | 2 -- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/main/java/eu/siacs/conversations/ui/adapter/MessageAdapter.java b/src/main/java/eu/siacs/conversations/ui/adapter/MessageAdapter.java index 8b77e8ba3..05580d307 100644 --- a/src/main/java/eu/siacs/conversations/ui/adapter/MessageAdapter.java +++ b/src/main/java/eu/siacs/conversations/ui/adapter/MessageAdapter.java @@ -123,7 +123,24 @@ public class MessageAdapter extends ArrayAdapter implements CopyTextVie } } - private static final Linkify.MatchFilter WEBURL_MATCH_FILTER = (cs, start, end) -> start < 1 || (cs.charAt(start - 1) != '@' && cs.charAt(start - 1) != '.' && !cs.subSequence(Math.max(0, start - 3), start).equals("://")); + private static final Linkify.MatchFilter WEBURL_MATCH_FILTER = (cs, start, end) -> { + if (start > 0) { + if (cs.charAt(start - 1) == '@' || cs.charAt(start - 1) == '.' + || cs.subSequence(Math.max(0, start - 3), start).equals("://")) { + return false; + } + } + + if (end < cs.length()) { + // Reject strings that were probably matched only because they contain a dot followed by + // by some known TLD (see also comment for WORD_BOUNDARY in Patterns.java) + if (Character.isAlphabetic(cs.charAt(end-1)) && Character.isAlphabetic(cs.charAt(end))) { + return false; + } + } + + return true; + }; private static final Linkify.MatchFilter XMPPURI_MATCH_FILTER = (s, start, end) -> { XmppUri uri = new XmppUri(s.subSequence(start, end).toString()); diff --git a/src/main/java/eu/siacs/conversations/utils/Patterns.java b/src/main/java/eu/siacs/conversations/utils/Patterns.java index 6e9e85cd7..fae13aaea 100644 --- a/src/main/java/eu/siacs/conversations/utils/Patterns.java +++ b/src/main/java/eu/siacs/conversations/utils/Patterns.java @@ -353,7 +353,6 @@ public class Patterns { + "(?:" + PORT_NUMBER + ")?" + ")" + "(?:" + PATH_AND_QUERY + ")?" - + WORD_BOUNDARY + ")"; /** * Regular expression to match strings that start with a supported protocol. Rules for domain @@ -367,7 +366,6 @@ public class Patterns { + "(?:" + PORT_NUMBER + ")?" + ")" + "(?:" + PATH_AND_QUERY + ")?" - + WORD_BOUNDARY + ")"; /** * Regular expression pattern to match IRIs. If a string starts with http(s):// the expression