Merge "Support for new gTLDs"
This commit is contained in:
@@ -28,7 +28,12 @@ public class Patterns {
|
||||
* List accurate as of 2011/07/18. List taken from:
|
||||
* http://data.iana.org/TLD/tlds-alpha-by-domain.txt
|
||||
* This pattern is auto-generated by frameworks/ex/common/tools/make-iana-tld-pattern.py
|
||||
*
|
||||
* @deprecated Due to the recent profileration of gTLDs, this API is
|
||||
* expected to become out-of-date very quickly. Therefore it is now
|
||||
* deprecated.
|
||||
*/
|
||||
@Deprecated
|
||||
public static final String TOP_LEVEL_DOMAIN_STR =
|
||||
"((aero|arpa|asia|a[cdefgilmnoqrstuwxz])"
|
||||
+ "|(biz|b[abdefghijmnorstvwyz])"
|
||||
@@ -59,7 +64,9 @@ public class Patterns {
|
||||
|
||||
/**
|
||||
* Regular expression pattern to match all IANA top-level domains.
|
||||
* @deprecated This API is deprecated. See {@link #TOP_LEVEL_DOMAIN_STR}.
|
||||
*/
|
||||
@Deprecated
|
||||
public static final Pattern TOP_LEVEL_DOMAIN =
|
||||
Pattern.compile(TOP_LEVEL_DOMAIN_STR);
|
||||
|
||||
@@ -68,7 +75,10 @@ public class Patterns {
|
||||
* List accurate as of 2011/07/18. List taken from:
|
||||
* http://data.iana.org/TLD/tlds-alpha-by-domain.txt
|
||||
* This pattern is auto-generated by frameworks/ex/common/tools/make-iana-tld-pattern.py
|
||||
*
|
||||
* @deprecated This API is deprecated. See {@link #TOP_LEVEL_DOMAIN_STR}.
|
||||
*/
|
||||
@Deprecated
|
||||
public static final String TOP_LEVEL_DOMAIN_STR_FOR_WEB_URL =
|
||||
"(?:"
|
||||
+ "(?:aero|arpa|asia|a[cdefgilmnoqrstuwxz])"
|
||||
@@ -107,6 +117,24 @@ public class Patterns {
|
||||
public static final String GOOD_IRI_CHAR =
|
||||
"a-zA-Z0-9\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF";
|
||||
|
||||
public static final Pattern IP_ADDRESS
|
||||
= Pattern.compile(
|
||||
"((25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(25[0-5]|2[0-4]"
|
||||
+ "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1]"
|
||||
+ "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
|
||||
+ "|[1-9][0-9]|[0-9]))");
|
||||
|
||||
/**
|
||||
* RFC 1035 Section 2.3.4 limits the labels to a maximum 63 octets.
|
||||
*/
|
||||
private static final String IRI
|
||||
= "[" + GOOD_IRI_CHAR + "]([" + GOOD_IRI_CHAR + "\\-]{0,61}[" + GOOD_IRI_CHAR + "]){0,1}";
|
||||
|
||||
private static final String HOST_NAME = IRI + "(?:\\." + IRI + ")+";
|
||||
|
||||
public static final Pattern DOMAIN_NAME
|
||||
= Pattern.compile("(" + HOST_NAME + "|" + IP_ADDRESS + ")");
|
||||
|
||||
/**
|
||||
* Regular expression pattern to match most part of RFC 3987
|
||||
* Internationalized URLs, aka IRIs. Commonly used Unicode characters are
|
||||
@@ -116,13 +144,7 @@ public class Patterns {
|
||||
"((?:(http|https|Http|Https|rtsp|Rtsp):\\/\\/(?:(?:[a-zA-Z0-9\\$\\-\\_\\.\\+\\!\\*\\'\\(\\)"
|
||||
+ "\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,64}(?:\\:(?:[a-zA-Z0-9\\$\\-\\_"
|
||||
+ "\\.\\+\\!\\*\\'\\(\\)\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,25})?\\@)?)?"
|
||||
+ "((?:(?:[" + GOOD_IRI_CHAR + "][" + GOOD_IRI_CHAR + "\\-]{0,64}\\.)+" // named host
|
||||
+ TOP_LEVEL_DOMAIN_STR_FOR_WEB_URL
|
||||
+ "|(?:(?:25[0-5]|2[0-4]" // or ip address
|
||||
+ "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(?:25[0-5]|2[0-4][0-9]"
|
||||
+ "|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1]"
|
||||
+ "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
|
||||
+ "|[1-9][0-9]|[0-9])))"
|
||||
+ "(?:" + DOMAIN_NAME + ")"
|
||||
+ "(?:\\:\\d{1,5})?)" // plus option port number
|
||||
+ "(\\/(?:(?:[" + GOOD_IRI_CHAR + "\\;\\/\\?\\:\\@\\&\\=\\#\\~" // plus option query params
|
||||
+ "\\-\\.\\+\\!\\*\\'\\(\\)\\,\\_])|(?:\\%[a-fA-F0-9]{2}))*)?"
|
||||
@@ -130,19 +152,6 @@ public class Patterns {
|
||||
// input. This is to stop foo.sure from
|
||||
// matching as foo.su
|
||||
|
||||
public static final Pattern IP_ADDRESS
|
||||
= Pattern.compile(
|
||||
"((25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(25[0-5]|2[0-4]"
|
||||
+ "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1]"
|
||||
+ "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
|
||||
+ "|[1-9][0-9]|[0-9]))");
|
||||
|
||||
public static final Pattern DOMAIN_NAME
|
||||
= Pattern.compile(
|
||||
"(((([" + GOOD_IRI_CHAR + "][" + GOOD_IRI_CHAR + "\\-]*)*[" + GOOD_IRI_CHAR + "]\\.)+"
|
||||
+ TOP_LEVEL_DOMAIN + ")|"
|
||||
+ IP_ADDRESS + ")");
|
||||
|
||||
public static final Pattern EMAIL_ADDRESS
|
||||
= Pattern.compile(
|
||||
"[a-zA-Z0-9\\+\\.\\_\\%\\-\\+]{1,256}" +
|
||||
@@ -159,7 +168,7 @@ public class Patterns {
|
||||
* might be phone numbers in arbitrary text, not for validating whether
|
||||
* something is in fact a phone number. It will miss many things that
|
||||
* are legitimate phone numbers.
|
||||
*
|
||||
*
|
||||
* <p> The pattern matches the following:
|
||||
* <ul>
|
||||
* <li>Optionally, a + sign followed immediately by one or more digits. Spaces, dots, or dashes
|
||||
|
||||
Reference in New Issue
Block a user