diff options
Diffstat (limited to 'node_modules/linkify-it/lib/re.js')
-rw-r--r-- | node_modules/linkify-it/lib/re.js | 181 |
1 files changed, 181 insertions, 0 deletions
diff --git a/node_modules/linkify-it/lib/re.js b/node_modules/linkify-it/lib/re.js new file mode 100644 index 0000000..5f34a72 --- /dev/null +++ b/node_modules/linkify-it/lib/re.js @@ -0,0 +1,181 @@ +'use strict'; + + +module.exports = function (opts) { + var re = {}; + + // Use direct extract instead of `regenerate` to reduse browserified size + re.src_Any = require('uc.micro/properties/Any/regex').source; + re.src_Cc = require('uc.micro/categories/Cc/regex').source; + re.src_Z = require('uc.micro/categories/Z/regex').source; + re.src_P = require('uc.micro/categories/P/regex').source; + + // \p{\Z\P\Cc\CF} (white spaces + control + format + punctuation) + re.src_ZPCc = [ re.src_Z, re.src_P, re.src_Cc ].join('|'); + + // \p{\Z\Cc} (white spaces + control) + re.src_ZCc = [ re.src_Z, re.src_Cc ].join('|'); + + // Experimental. List of chars, completely prohibited in links + // because can separate it from other part of text + var text_separators = '[><\uff5c]'; + + // All possible word characters (everything without punctuation, spaces & controls) + // Defined via punctuation & spaces to save space + // Should be something like \p{\L\N\S\M} (\w but without `_`) + re.src_pseudo_letter = '(?:(?!' + text_separators + '|' + re.src_ZPCc + ')' + re.src_Any + ')'; + // The same as abothe but without [0-9] + // var src_pseudo_letter_non_d = '(?:(?![0-9]|' + src_ZPCc + ')' + src_Any + ')'; + + //////////////////////////////////////////////////////////////////////////////// + + re.src_ip4 = + + '(?:(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'; + + // Prohibit any of "@/[]()" in user/pass to avoid wrong domain fetch. + re.src_auth = '(?:(?:(?!' + re.src_ZCc + '|[@/\\[\\]()]).)+@)?'; + + re.src_port = + + '(?::(?:6(?:[0-4]\\d{3}|5(?:[0-4]\\d{2}|5(?:[0-2]\\d|3[0-5])))|[1-5]?\\d{1,4}))?'; + + re.src_host_terminator = + + '(?=$|' + text_separators + '|' + re.src_ZPCc + ')(?!-|_|:\\d|\\.-|\\.(?!$|' + re.src_ZPCc + '))'; + + re.src_path = + + '(?:' + + '[/?#]' + + '(?:' + + '(?!' + re.src_ZCc + '|' + text_separators + '|[()[\\]{}.,"\'?!\\-;]).|' + + '\\[(?:(?!' + re.src_ZCc + '|\\]).)*\\]|' + + '\\((?:(?!' + re.src_ZCc + '|[)]).)*\\)|' + + '\\{(?:(?!' + re.src_ZCc + '|[}]).)*\\}|' + + '\\"(?:(?!' + re.src_ZCc + '|["]).)+\\"|' + + "\\'(?:(?!" + re.src_ZCc + "|[']).)+\\'|" + + "\\'(?=" + re.src_pseudo_letter + '|[-]).|' + // allow `I'm_king` if no pair found + '\\.{2,}[a-zA-Z0-9%/&]|' + // google has many dots in "google search" links (#66, #81). + // github has ... in commit range links, + // Restrict to + // - english + // - percent-encoded + // - parts of file path + // - params separator + // until more examples found. + '\\.(?!' + re.src_ZCc + '|[.]).|' + + (opts && opts['---'] ? + '\\-(?!--(?:[^-]|$))(?:-*)|' // `---` => long dash, terminate + : + '\\-+|' + ) + + ',(?!' + re.src_ZCc + ').|' + // allow `,,,` in paths + ';(?!' + re.src_ZCc + ').|' + // allow `;` if not followed by space-like char + '\\!+(?!' + re.src_ZCc + '|[!]).|' + // allow `!!!` in paths, but not at the end + '\\?(?!' + re.src_ZCc + '|[?]).' + + ')+' + + '|\\/' + + ')?'; + + // Allow anything in markdown spec, forbid quote (") at the first position + // because emails enclosed in quotes are far more common + re.src_email_name = + + '[\\-;:&=\\+\\$,\\.a-zA-Z0-9_][\\-;:&=\\+\\$,\\"\\.a-zA-Z0-9_]*'; + + re.src_xn = + + 'xn--[a-z0-9\\-]{1,59}'; + + // More to read about domain names + // http://serverfault.com/questions/638260/ + + re.src_domain_root = + + // Allow letters & digits (http://test1) + '(?:' + + re.src_xn + + '|' + + re.src_pseudo_letter + '{1,63}' + + ')'; + + re.src_domain = + + '(?:' + + re.src_xn + + '|' + + '(?:' + re.src_pseudo_letter + ')' + + '|' + + '(?:' + re.src_pseudo_letter + '(?:-|' + re.src_pseudo_letter + '){0,61}' + re.src_pseudo_letter + ')' + + ')'; + + re.src_host = + + '(?:' + + // Don't need IP check, because digits are already allowed in normal domain names + // src_ip4 + + // '|' + + '(?:(?:(?:' + re.src_domain + ')\\.)*' + re.src_domain/*_root*/ + ')' + + ')'; + + re.tpl_host_fuzzy = + + '(?:' + + re.src_ip4 + + '|' + + '(?:(?:(?:' + re.src_domain + ')\\.)+(?:%TLDS%))' + + ')'; + + re.tpl_host_no_ip_fuzzy = + + '(?:(?:(?:' + re.src_domain + ')\\.)+(?:%TLDS%))'; + + re.src_host_strict = + + re.src_host + re.src_host_terminator; + + re.tpl_host_fuzzy_strict = + + re.tpl_host_fuzzy + re.src_host_terminator; + + re.src_host_port_strict = + + re.src_host + re.src_port + re.src_host_terminator; + + re.tpl_host_port_fuzzy_strict = + + re.tpl_host_fuzzy + re.src_port + re.src_host_terminator; + + re.tpl_host_port_no_ip_fuzzy_strict = + + re.tpl_host_no_ip_fuzzy + re.src_port + re.src_host_terminator; + + + //////////////////////////////////////////////////////////////////////////////// + // Main rules + + // Rude test fuzzy links by host, for quick deny + re.tpl_host_fuzzy_test = + + 'localhost|www\\.|\\.\\d{1,3}\\.|(?:\\.(?:%TLDS%)(?:' + re.src_ZPCc + '|>|$))'; + + re.tpl_email_fuzzy = + + '(^|' + text_separators + '|"|\\(|' + re.src_ZCc + ')' + + '(' + re.src_email_name + '@' + re.tpl_host_fuzzy_strict + ')'; + + re.tpl_link_fuzzy = + // Fuzzy link can't be prepended with .:/\- and non punctuation. + // but can start with > (markdown blockquote) + '(^|(?![.:/\\-_@])(?:[$+<=>^`|\uff5c]|' + re.src_ZPCc + '))' + + '((?![$+<=>^`|\uff5c])' + re.tpl_host_port_fuzzy_strict + re.src_path + ')'; + + re.tpl_link_no_ip_fuzzy = + // Fuzzy link can't be prepended with .:/\- and non punctuation. + // but can start with > (markdown blockquote) + '(^|(?![.:/\\-_@])(?:[$+<=>^`|\uff5c]|' + re.src_ZPCc + '))' + + '((?![$+<=>^`|\uff5c])' + re.tpl_host_port_no_ip_fuzzy_strict + re.src_path + ')'; + + return re; +}; |