Parse IPv6 URIs (#3841).
This commit is contained in:
parent
4040d5f647
commit
ca08c27eef
|
@ -254,6 +254,39 @@ public class Patterns {
|
||||||
+ "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1]"
|
+ "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1]"
|
||||||
+ "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
|
+ "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
|
||||||
+ "|[1-9][0-9]|[0-9]))");
|
+ "|[1-9][0-9]|[0-9]))");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* IPv6 address matcher for
|
||||||
|
* IPv6 addresses
|
||||||
|
* zero compressed IPv6 addresses (section 2.2 of rfc5952)
|
||||||
|
* link-local IPv6 addresses with zone index (section 11 of rfc4007)
|
||||||
|
* IPv4-Embedded IPv6 Address (section 2 of rfc6052)
|
||||||
|
* IPv4-mapped IPv6 addresses (section 2.1 of rfc2765)
|
||||||
|
* IPv4-translated addresses (section 2.1 of rfc2765)
|
||||||
|
*
|
||||||
|
* Taken from https://stackoverflow.com/questions/53497/regular-expression-that-matches-valid-ipv6-addresses/17871737#17871737
|
||||||
|
*/
|
||||||
|
public static final Pattern IP6_ADDRESS
|
||||||
|
= Pattern.compile(
|
||||||
|
"\\[" +
|
||||||
|
"(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|" +
|
||||||
|
"([0-9a-fA-F]{1,4}:){1,7}:|" +
|
||||||
|
"([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|" +
|
||||||
|
"([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|" +
|
||||||
|
"([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|" +
|
||||||
|
"([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|" +
|
||||||
|
"([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|" +
|
||||||
|
"[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|" +
|
||||||
|
":((:[0-9a-fA-F]{1,4}){1,7}|:)|" +
|
||||||
|
"fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|" +
|
||||||
|
"::(ffff(:0{1,4}){0,1}:){0,1}" +
|
||||||
|
"((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}" +
|
||||||
|
"(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|" +
|
||||||
|
"([0-9a-fA-F]{1,4}:){1,4}:" +
|
||||||
|
"((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}" +
|
||||||
|
"(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))" +
|
||||||
|
"\\]"
|
||||||
|
);
|
||||||
/**
|
/**
|
||||||
* Valid UCS characters defined in RFC 3987. Excludes space characters.
|
* Valid UCS characters defined in RFC 3987. Excludes space characters.
|
||||||
*/
|
*/
|
||||||
|
@ -296,7 +329,7 @@ public class Patterns {
|
||||||
private static final String TLD = "(" + PUNYCODE_TLD + "|" + "[" + TLD_CHAR + "]{2,63}" +")";
|
private static final String TLD = "(" + PUNYCODE_TLD + "|" + "[" + TLD_CHAR + "]{2,63}" +")";
|
||||||
private static final String HOST_NAME = "(" + IRI_LABEL + "\\.)+" + TLD;
|
private static final String HOST_NAME = "(" + IRI_LABEL + "\\.)+" + TLD;
|
||||||
public static final Pattern DOMAIN_NAME
|
public static final Pattern DOMAIN_NAME
|
||||||
= Pattern.compile("(" + HOST_NAME + "|" + IP_ADDRESS + ")");
|
= Pattern.compile("(" + HOST_NAME + "|" + IP6_ADDRESS + "|" + IP_ADDRESS +")");
|
||||||
private static final String PROTOCOL = "(?i:http|https|rtsp):\\/\\/";
|
private static final String PROTOCOL = "(?i:http|https|rtsp):\\/\\/";
|
||||||
/* A word boundary or end of input. This is to stop foo.sure from matching as foo.su */
|
/* A word boundary or end of input. This is to stop foo.sure from matching as foo.su */
|
||||||
private static final String WORD_BOUNDARY = "(?:\\b|$|^)";
|
private static final String WORD_BOUNDARY = "(?:\\b|$|^)";
|
||||||
|
@ -341,12 +374,12 @@ public class Patterns {
|
||||||
* {@link #IP_ADDRESS}
|
* {@link #IP_ADDRESS}
|
||||||
*/
|
*/
|
||||||
private static final Pattern STRICT_DOMAIN_NAME
|
private static final Pattern STRICT_DOMAIN_NAME
|
||||||
= Pattern.compile("(?:" + STRICT_HOST_NAME + "|" + IP_ADDRESS + ")");
|
= Pattern.compile("(?:" + STRICT_HOST_NAME + "|" + IP_ADDRESS + "|" + IP6_ADDRESS + ")");
|
||||||
/**
|
/**
|
||||||
* Regular expression that matches domain names without a TLD
|
* Regular expression that matches domain names without a TLD
|
||||||
*/
|
*/
|
||||||
private static final String RELAXED_DOMAIN_NAME =
|
private static final String RELAXED_DOMAIN_NAME =
|
||||||
"(?:" + "(?:" + IRI_LABEL + "(?:\\.(?=\\S))" +"?)+" + "|" + IP_ADDRESS + ")";
|
"(?:" + "(?:" + IRI_LABEL + "(?:\\.(?=\\S))" +"?)+" + "|" + IP_ADDRESS + "|" + IP6_ADDRESS + ")";
|
||||||
/**
|
/**
|
||||||
* Regular expression to match strings that do not start with a supported protocol. The TLDs
|
* Regular expression to match strings that do not start with a supported protocol. The TLDs
|
||||||
* are expected to be one of the known TLDs.
|
* are expected to be one of the known TLDs.
|
||||||
|
|
Loading…
Reference in a new issue