Zeile 1: |
Zeile 1: |
− | --[=[ URLutil 2013-04-17 | + | local URLutil = { suite = "URLutil", |
| + | serial = "2020-07-01", |
| + | item = 10859193 } |
| + | --[=[ |
| Utilities for URL etc. on www. | | Utilities for URL etc. on www. |
| + | * decode() |
| + | * encode() |
| * getAuthority() | | * getAuthority() |
| + | * getFragment() |
| * getHost() | | * getHost() |
| + | * getLocation() |
| + | * getNormalized() |
| + | * getPath() |
| * getPort() | | * getPort() |
| + | * getQuery() |
| + | * getQueryTable() |
| + | * getRelativePath() |
| * getScheme() | | * getScheme() |
| + | * getSortkey() |
| + | * getTLD() |
| + | * getTop2domain() |
| + | * getTop3domain() |
| * isAuthority() | | * isAuthority() |
| * isDomain() | | * isDomain() |
| + | * isDomainExample() |
| + | * isDomainInt() |
| * isHost() | | * isHost() |
| + | * isHostPathResource() |
| * isIP() | | * isIP() |
| + | * isIPlocal() |
| * isIPv4() | | * isIPv4() |
| * isIPv6() | | * isIPv6() |
| * isMailAddress() | | * isMailAddress() |
| * isMailLink() | | * isMailLink() |
− | * isProtocolWiki | + | * isProtocolDialog() |
− | * isRessourceURL() | + | * isProtocolWiki() |
| + | * isResourceURL() |
| * isSuspiciousURL() | | * isSuspiciousURL() |
| * isUnescapedURL() | | * isUnescapedURL() |
| * isWebURL() | | * isWebURL() |
− | Only [[dotted decimal]] notation for IPv4 supported. | + | * wikiEscapeURL() |
| + | * failsafe() |
| + | Only [[dotted decimal]] notation for IPv4 expected. |
| Does not support dotted hexadecimal, dotted octal, or single-number formats. | | Does not support dotted hexadecimal, dotted octal, or single-number formats. |
| IPv6 URL (bracketed) not yet implemented; might need Wikintax escaping anyway. | | IPv6 URL (bracketed) not yet implemented; might need Wikintax escaping anyway. |
| + | ]=] |
| + | local Failsafe = URLutil |
| + | |
| + | |
| + | |
| + | local decodeComponentProtect = { F = "\"#%<>[\]^`{|}", |
| + | P = "\"#%<>[\]^`{|}/?", |
| + | Q = "\"#%<>[\]^`{|}&=+;,", |
| + | X = "\"#%<>[\]^`{|}&=+;,/?" } |
| + | |
| + | |
| + | |
| + | local decodeComponentEscape = function ( averse, adapt ) |
| + | return adapt == 20 or adapt == 127 or |
| + | decodeComponentProtect[ averse ]:find( string.char( adapt ), |
| + | 1, |
| + | true ) |
| + | end -- decodeComponentEscape() |
| + | |
| + | |
| + | |
| + | local decodeComponentML = function ( ask ) |
| + | local i = 1 |
| + | local j, n, s |
| + | while ( i ) do |
| + | i = ask:find( "&#[xX]%x%x+;", i ) |
| + | if i then |
| + | j = ask:find( ";", i + 3, true ) |
| + | s = ask:sub( i + 2, j - 1 ):upper() |
| + | n = s:byte( 1, 1 ) |
| + | if n == 88 then |
| + | n = tonumber( s:sub( 2 ), 16 ) |
| + | elseif s:match( "^%d+$" ) then |
| + | n = tonumber( s ) |
| + | else |
| + | n = false |
| + | end |
| + | if n then |
| + | if n >= 128 then |
| + | s = string.format( "&#%d;", n ) |
| + | elseif decodeComponentEscape( "X", n ) then |
| + | s = string.format( "%%%02X", n ) |
| + | else |
| + | s = string.format( "%c", n ) |
| + | end |
| + | j = j + 1 |
| + | if i == 1 then |
| + | ask = s .. ask:sub( j ) |
| + | else |
| + | ask = string.format( "%s%s%s", |
| + | ask:sub( 1, i - 1 ), |
| + | s, |
| + | ask:sub( j ) ) |
| + | end |
| + | end |
| + | i = i + 1 |
| + | end |
| + | end -- while i |
| + | return ask |
| + | end -- decodeComponentML() |
| + | |
| + | |
| + | |
| + | local decodeComponentPercent = function ( ask, averse ) |
| + | local i = 1 |
| + | local j, k, m, n |
| + | while ( i ) do |
| + | i = ask:find( "%%[2-7]%x", i ) |
| + | if i then |
| + | j = i + 1 |
| + | k = j + 1 |
| + | n = ask:byte( k, k ) |
| + | k = k + 1 |
| + | m = ( n > 96 ) |
| + | if m then |
| + | n = n - 32 |
| + | m = n |
| + | end |
| + | if n > 57 then |
| + | n = n - 55 |
| + | else |
| + | n = n - 48 |
| + | end |
| + | n = ( ask:byte( j, j ) - 48 ) * 16 + n |
| + | if decodeComponentEscape( averse, n ) then |
| + | if m then |
| + | ask = string.format( "%s%c%s", |
| + | ask:sub( 1, j ), |
| + | m, |
| + | ask:sub( k ) ) |
| + | end |
| + | elseif i == 1 then |
| + | ask = string.format( "%c%s", n, ask:sub( k ) ) |
| + | else |
| + | ask = string.format( "%s%c%s", |
| + | ask:sub( 1, i - 1 ), |
| + | n, |
| + | ask:sub( k ) ) |
| + | end |
| + | i = j |
| + | end |
| + | end -- while i |
| + | return ask |
| + | end -- decodeComponentPercent() |
| + | |
| + | |
| + | |
| + | local getTopDomain = function ( url, mode ) |
| + | local r = URLutil.getHost( url ) |
| + | if r then |
| + | local pattern = "[%w%%%-]+%.%a[%w%-]*%a)$" |
| + | if mode == 3 then |
| + | pattern = "[%w%%%-]+%." .. pattern |
| + | end |
| + | r = mw.ustring.match( "." .. r, "%.(" .. pattern ) |
| + | if not r then |
| + | r = false |
| + | end |
| + | else |
| + | r = false |
| + | end |
| + | return r |
| + | end -- getTopDomain() |
| + | |
| + | |
| + | |
| + | local getHash = function ( url ) |
| + | local r = url:find( "#", 1, true ) |
| + | if r then |
| + | local i = url:find( "&#", 1, true ) |
| + | if i then |
| + | local s |
| + | while ( i ) do |
| + | s = url:sub( i + 2 ) |
| + | if s:match( "^%d+;" ) or s:match( "^x%x+;" ) then |
| + | r = url:find( "#", i + 4, true ) |
| + | if r then |
| + | i = url:find( "&#", i + 4, true ) |
| + | else |
| + | i = false |
| + | end |
| + | else |
| + | r = i + 1 |
| + | i = false |
| + | end |
| + | end -- while i |
| + | end |
| + | end |
| + | return r |
| + | end -- getHash() |
| + | |
| + | |
| + | |
| + | URLutil.decode = function ( url, enctype ) |
| + | local r, s |
| + | if type( enctype ) == "string" then |
| + | s = mw.text.trim( enctype ) |
| + | if s == "" then |
| + | s = false |
| + | else |
| + | s = s:upper() |
| + | end |
| + | end |
| + | r = mw.text.encode( mw.uri.decode( url, s ) ) |
| + | if r:find( "[%[|%]]" ) then |
| + | local k |
| + | r, k = r:gsub( "%[", "[" ) |
| + | :gsub( "|", "|" ) |
| + | :gsub( "%]", "]" ) |
| + | end |
| + | return r |
| + | end -- URLutil.decode() |
| + | |
| | | |
− | Functions are not "local",
| |
− | so other modules can require this module and call them directly.
| |
− | We return an object with small stub functions to call the real ones
| |
− | so that the functions can be called from templates also.
| |
− | ----
| |
− | Based upon w:en:Special:Permalink/542839577?title=Module:IPAddress 2013-03-01
| |
− | Unit tests at :en:Module:IPAddress/tests
| |
− | ]=]
| |
| | | |
| + | URLutil.encode = function ( url, enctype ) |
| + | local k, r, s |
| + | if type( enctype ) == "string" then |
| + | s = mw.text.trim( enctype ) |
| + | if s == "" then |
| + | s = false |
| + | else |
| + | s = s:upper() |
| + | end |
| + | end |
| + | r = mw.uri.encode( url, s ) |
| + | k = r:byte( 1, 1 ) |
| + | if -- k == 35 or -- # |
| + | k == 42 or -- * |
| + | k == 58 or -- : |
| + | k == 59 then -- ; |
| + | r = string.format( "%%%X%s", k, r:sub( 2 ) ) |
| + | end |
| + | if r:find( "[%[|%]]" ) then |
| + | r, k = r:gsub( "%[", "%5B" ) |
| + | :gsub( "|", "%7C" ) |
| + | :gsub( "%]", "%5D" ) |
| + | end |
| + | return r |
| + | end -- URLutil.encode() |
| | | |
| | | |
| | | |
− | function _getAuthority( url ) | + | URLutil.getAuthority = function ( url ) |
| + | local r |
| if type( url ) == "string" then | | if type( url ) == "string" then |
− | local host, colon, port = mw.ustring.match( url .. "/", "^%s*%w*:?//([%w.%%-]+)(:?)([%d]*)/" ) | + | local colon, host, port |
− | if _isHost( host ) then | + | local pattern = "^%s*%w*:?//([%w%.%%_-]+)(:?)([%d]*)/" |
| + | local s = mw.text.decode( url ) |
| + | local i = s:find( "#", 6, true ) |
| + | if i then |
| + | s = s:sub( 1, i - 1 ) .. "/" |
| + | else |
| + | s = s .. "/" |
| + | end |
| + | host, colon, port = mw.ustring.match( s, pattern ) |
| + | if URLutil.isHost( host ) then |
| host = mw.ustring.lower( host ) | | host = mw.ustring.lower( host ) |
| if colon == ":" then | | if colon == ":" then |
| if port:find( "^[1-9]" ) then | | if port:find( "^[1-9]" ) then |
− | return ( host .. ":" .. port ) | + | r = ( host .. ":" .. port ) |
| end | | end |
| elseif #port == 0 then | | elseif #port == 0 then |
− | return host | + | r = host |
| end | | end |
| end | | end |
| + | else |
| + | r = false |
| end | | end |
− | return false | + | return r |
− | end -- _getAuthority() | + | end -- URLutil.getAuthority() |
| + | |
| + | |
| + | |
| + | URLutil.getFragment = function ( url, decode ) |
| + | local r |
| + | if type( url ) == "string" then |
| + | local i = getHash( url ) |
| + | if i then |
| + | r = mw.text.trim( url:sub( i ) ):sub( 2 ) |
| + | if type( decode ) == "string" then |
| + | local encoding = mw.text.trim( decode ) |
| + | local launch |
| + | if encoding == "%" then |
| + | launch = true |
| + | elseif encoding == "WIKI" then |
| + | r = r:gsub( "%.(%x%x)", "%%%1" ) |
| + | :gsub( "_", " " ) |
| + | launch = true |
| + | end |
| + | if launch then |
| + | r = mw.uri.decode( r, "PATH" ) |
| + | end |
| + | end |
| + | else |
| + | r = false |
| + | end |
| + | else |
| + | r = nil |
| + | end |
| + | return r |
| + | end -- URLutil.getFragment() |
| + | |
| + | |
| + | |
| + | URLutil.getHost = function ( url ) |
| + | local r = URLutil.getAuthority( url ) |
| + | if r then |
| + | r = mw.ustring.match( r, "^([%w%.%%_%-]+):?[%d]*$" ) |
| + | end |
| + | return r |
| + | end -- URLutil.getHost() |
| + | |
| + | |
| + | |
| + | URLutil.getLocation = function ( url ) |
| + | local r |
| + | if type( url ) == "string" then |
| + | r = mw.text.trim( url ) |
| + | if r == "" then |
| + | r = false |
| + | else |
| + | local i |
| + | i = getHash( r ) |
| + | if i then |
| + | if i == 1 then |
| + | r = false |
| + | else |
| + | r = r:sub( 1, i - 1 ) |
| + | end |
| + | end |
| + | end |
| + | else |
| + | r = nil |
| + | end |
| + | return r |
| + | end -- URLutil.getLocation() |
| + | |
| + | |
| + | |
| + | URLutil.getNormalized = function ( url ) |
| + | local r |
| + | if type( url ) == "string" then |
| + | r = mw.text.trim( url ) |
| + | if r == "" then |
| + | r = false |
| + | else |
| + | r = decodeComponentML( r ) |
| + | end |
| + | else |
| + | r = false |
| + | end |
| + | if r then |
| + | local k = r:find( "//", 1, true ) |
| + | if k then |
| + | local j = r:find( "/", k + 2, true ) |
| + | local sF, sP, sQ |
| + | if r:find( "%%[2-7]%x" ) then |
| + | local i = getHash( r ) |
| + | if i then |
| + | sF = r:sub( i + 1 ) |
| + | r = r:sub( 1, i - 1 ) |
| + | if sF == "" then |
| + | sF = false |
| + | else |
| + | sF = decodeComponentPercent( sF, "F" ) |
| + | end |
| + | end |
| + | i = r:find( "?", 1, true ) |
| + | if i then |
| + | sQ = r:sub( i ) |
| + | r = r:sub( 1, i - 1 ) |
| + | sQ = decodeComponentPercent( sQ, "Q" ) |
| + | end |
| + | if j then |
| + | if #r > j then |
| + | sP = r:sub( j + 1 ) |
| + | sP = decodeComponentPercent( sP, "P" ) |
| + | end |
| + | r = r:sub( 1, j - 1 ) |
| + | end |
| + | elseif j then |
| + | local n = #r |
| + | if r:byte( n, n ) == 35 then -- '#' |
| + | n = n - 1 |
| + | r = r:sub( 1, n ) |
| + | end |
| + | if n > j then |
| + | sP = r:sub( j + 1 ) |
| + | end |
| + | r = r:sub( 1, j - 1 ) |
| + | end |
| + | r = mw.ustring.lower( r ) .. "/" |
| + | if sP then |
| + | r = r .. sP |
| + | end |
| + | if sQ then |
| + | r = r .. sQ |
| + | end |
| + | if sF then |
| + | r = string.format( "%s#%s", r, sF ) |
| + | end |
| + | end |
| + | r = r:gsub( " ", "%%20" ) |
| + | :gsub( "%[", "%%5B" ) |
| + | :gsub( "|", "%%7C" ) |
| + | :gsub( "%]", "%%5D" ) |
| + | :gsub( "%<", "%%3C" ) |
| + | :gsub( "%>", "%%3E" ) |
| + | end |
| + | return r |
| + | end -- URLutil.getNormalized() |
| + | |
| + | |
| + | |
| + | URLutil.getPath = function ( url ) |
| + | local r = URLutil.getRelativePath( url ) |
| + | if r then |
| + | local s = r:match( "^([^%?]*)%?" ) |
| + | if s then |
| + | r = s |
| + | end |
| + | s = r:match( "^([^#]*)#" ) |
| + | if s then |
| + | r = s |
| + | end |
| + | end |
| + | return r |
| + | end -- URLutil.getPath() |
| + | |
| + | |
| + | |
| + | URLutil.getPort = function ( url ) |
| + | local r = URLutil.getAuthority( url ) |
| + | if r then |
| + | r = r:match( ":([1-9][0-9]*)$" ) |
| + | if r then |
| + | r = tonumber( r ) |
| + | else |
| + | r = false |
| + | end |
| + | end |
| + | return r |
| + | end -- URLutil.getPort() |
| + | |
| + | |
| + | |
| + | URLutil.getQuery = function ( url, key, separator ) |
| + | local r = URLutil.getLocation( url ) |
| + | if r then |
| + | r = r:match( "^[^%?]*%?(.+)$" ) |
| + | if r then |
| + | if type( key ) == "string" then |
| + | local single = mw.text.trim( key ) |
| + | local sep = "&" |
| + | local s, scan |
| + | if type( separator ) == "string" then |
| + | s = mw.text.trim( separator ) |
| + | if s:match( "^[&;,/]$" ) then |
| + | sep = s |
| + | end |
| + | end |
| + | s = string.format( "%s%s%s", sep, r, sep ) |
| + | scan = string.format( "%s%s=([^%s]*)%s", |
| + | sep, key, sep, sep ) |
| + | r = s:match( scan ) |
| + | end |
| + | end |
| + | if not r then |
| + | r = false |
| + | end |
| + | end |
| + | return r |
| + | end -- URLutil.getQuery() |
| | | |
| | | |
| | | |
− | function _getHost( url ) | + | URLutil.getQueryTable = function ( url, separator ) |
− | local auth = _getAuthority( url ) | + | local r = URLutil.getQuery( url ) |
− | if auth then | + | if r then |
− | return mw.ustring.match( auth, "^([%w%.%%-]+):?[%d]*$" ) | + | local sep = "&" |
| + | local n, pairs, s, set |
| + | if type( separator ) == "string" then |
| + | s = mw.text.trim( separator ) |
| + | if s:match( "^[&;,/]$" ) then |
| + | sep = s |
| + | end |
| + | end |
| + | pairs = mw.text.split( r, sep, true ) |
| + | n = #pairs |
| + | r = { } |
| + | for i = 1, n do |
| + | s = pairs[ i ] |
| + | if s:find( "=", 2, true ) then |
| + | s, set = s:match( "^([^=]+)=(.*)$" ) |
| + | if s then |
| + | r[ s ] = set |
| + | end |
| + | else |
| + | r[ s ] = false |
| + | end |
| + | end -- for i |
| end | | end |
− | return false | + | return r |
− | end -- _getHost() | + | end -- URLutil.getQueryTable() |
| | | |
| | | |
| | | |
− | function _getPort( url ) | + | URLutil.getRelativePath = function ( url ) |
− | url = _getAuthority( url ) | + | local r |
− | if url then | + | if type( url ) == "string" then |
− | url = url:match( ":([1-9][0-9]*)$" ) | + | local s = url:match( "^%s*[a-zA-Z]*://(.*)$" ) |
− | if type( url ) == "string" then | + | if s then |
− | return tonumber( url ) | + | s = s:match( "[^/]+(/.*)$" ) |
| + | else |
| + | local x |
| + | x, s = url:match( "^%s*(/?)(/.*)$" ) |
| + | if x == "/" then |
| + | s = s:match( "/[^/]+(/.*)$" ) |
| + | end |
| + | end |
| + | if s then |
| + | r = mw.text.trim( s ) |
| + | elseif URLutil.isResourceURL( url ) then |
| + | r = "/" |
| + | else |
| + | r = false |
| end | | end |
| + | else |
| + | r = nil |
| end | | end |
− | return false | + | return r |
− | end -- _getPort() | + | end -- URLutil.getRelativePath() |
| | | |
| | | |
| | | |
− | function _getScheme( url ) | + | URLutil.getScheme = function ( url ) |
| + | local r |
| if type( url ) == "string" then | | if type( url ) == "string" then |
− | local prot, colon, slashes = url:match( "^%s*([a-zA-Z]*)(:?)(//)" ) | + | local pattern = "^%s*([a-zA-Z]*)(:?)(//)" |
| + | local prot, colon, slashes = url:match( pattern ) |
| + | r = false |
| if slashes == "//" then | | if slashes == "//" then |
− | if colon == ":" then
| + | if colon == ":" then |
− | if #prot > 2 then
| + | if #prot > 2 then |
− | return prot:lower() .. "://"
| + | r = prot:lower() .. "://" |
− | end
| + | end |
− | elseif #prot == 0 then
| + | elseif #prot == 0 then |
− | return "//"
| + | r = "//" |
− | end | + | end |
| + | end |
| + | else |
| + | r = nil |
| + | end |
| + | return r |
| + | end -- URLutil.getScheme() |
| + | |
| + | |
| + | |
| + | URLutil.getSortkey = function ( url ) |
| + | local r = url |
| + | if type( url ) == "string" then |
| + | local i = url:find( "//" ) |
| + | if i then |
| + | local scheme |
| + | if i == 0 then |
| + | scheme = "" |
| + | else |
| + | scheme = url:match( "^%s*([a-zA-Z]*)://" ) |
| + | end |
| + | if scheme then |
| + | local s = url:sub( i + 2 ) |
| + | local comps, site, m, suffix |
| + | scheme = scheme:lower() |
| + | i = s:find( "/" ) |
| + | if i and i > 1 then |
| + | suffix = s:sub( i + 1 ) -- mw.uri.encode() |
| + | s = s:sub( 1, i - 1 ) |
| + | suffix = suffix:gsub( "#", " " ) |
| + | else |
| + | suffix = "" |
| + | end |
| + | site, m = s:match( "^(.+)(:%d+)$" ) |
| + | if not m then |
| + | site = s |
| + | m = 0 |
| + | end |
| + | comps = mw.text.split( site:lower(), ".", true ) |
| + | r = "///" |
| + | for i = #comps, 2, -1 do |
| + | r = string.format( "%s%s.", r, comps[ i ] ) |
| + | end -- for --i |
| + | r = string.format( "%s%s %d %s: %s", |
| + | r, comps[ 1 ], m, scheme, suffix ) |
| + | end |
| end | | end |
| end | | end |
− | return false | + | return r |
− | end -- _getScheme() | + | end -- URLutil.getSortkey() |
| + | |
| + | |
| + | |
| + | URLutil.getTLD = function ( url ) |
| + | local r = URLutil.getHost( url ) |
| + | if r then |
| + | r = mw.ustring.match( r, "%w+%.(%a[%w%-]*%a)$" ) |
| + | if not r then |
| + | r = false |
| + | end |
| + | end |
| + | return r |
| + | end -- URLutil.getTLD() |
| + | |
| + | |
| + | |
| + | URLutil.getTop2domain = function ( url ) |
| + | return getTopDomain( url, 2 ) |
| + | end -- URLutil.getTop2domain() |
| + | |
| + | |
| + | |
| + | URLutil.getTop3domain = function ( url ) |
| + | return getTopDomain( url, 3 ) |
| + | end -- URLutil.getTop3domain() |
| | | |
| | | |
| | | |
− | function _isAuthority( s ) | + | URLutil.isAuthority = function ( s ) |
| + | local r |
| if type( s ) == "string" then | | if type( s ) == "string" then |
− | local host, colon, port = mw.ustring.match( s, "^%s*([%w%.%%-]+)(:?)(%d*)%s*$" ) | + | local pattern = "^%s*([%w%.%%_-]+)(:?)(%d*)%s*$" |
| + | local host, colon, port = mw.ustring.match( s, pattern ) |
| if colon == ":" then | | if colon == ":" then |
| port = port:match( "^[1-9][0-9]*$" ) | | port = port:match( "^[1-9][0-9]*$" ) |
| if type( port ) ~= "string" then | | if type( port ) ~= "string" then |
− | return false | + | r = false |
| end | | end |
| elseif port ~= "" then | | elseif port ~= "" then |
− | return false | + | r = false |
| end | | end |
− | return _isHost( host ) | + | r = URLutil.isHost( host ) |
| + | else |
| + | r = nil |
| end | | end |
− | return false | + | return r |
− | end -- _isAuthority() | + | end -- URLutil.isAuthority() |
| | | |
| | | |
| | | |
− | function _isDomain( s ) | + | URLutil.isDomain = function ( s ) |
| + | local r |
| if type( s ) == "string" then | | if type( s ) == "string" then |
− | s = mw.ustring.match( s, "^%s*([%w%.%%-]+%w)%.[a-zA-Z][a-zA-Z]+%s*$" ) | + | local scan = "^%s*([%w%.%%_-]*%w)%.(%a[%w-]*%a)%s*$" |
| + | local scope |
| + | s, scope = mw.ustring.match( s, scan ) |
| if type( s ) == "string" then | | if type( s ) == "string" then |
| if mw.ustring.find( s, "^%w" ) then | | if mw.ustring.find( s, "^%w" ) then |
| if mw.ustring.find( s, "..", 1, true ) then | | if mw.ustring.find( s, "..", 1, true ) then |
− | return false | + | r = false |
| else | | else |
− | return true | + | r = true |
| end | | end |
| end | | end |
| end | | end |
| + | else |
| + | r = nil |
| end | | end |
− | return false | + | return r |
− | end -- _isDomain() | + | end -- URLutil.isDomain() |
| + | |
| + | |
| + | |
| + | URLutil.isDomainExample = function ( url ) |
| + | -- RFC 2606: example.com example.net example.org example.edu |
| + | local r = getTopDomain( url, 2 ) |
| + | if r then |
| + | local s = r:lower():match( "^example%.([a-z][a-z][a-z])$" ) |
| + | if s then |
| + | r = ( s == "com" or |
| + | s == "edu" or |
| + | s == "net" or |
| + | s == "org" ) |
| + | else |
| + | r = false |
| + | end |
| + | end |
| + | return r |
| + | end -- URLutil.isDomainExample() |
| + | |
| + | |
| + | |
| + | URLutil.isDomainInt = function ( url ) |
| + | -- Internationalized Domain Name (Punycode) |
| + | local r = URLutil.getHost( url ) |
| + | if r then |
| + | if r:match( "^[!-~]+$" ) then |
| + | local s = "." .. r |
| + | if s:find( ".xn--", 1, true ) then |
| + | r = true |
| + | else |
| + | r = false |
| + | end |
| + | else |
| + | r = true |
| + | end |
| + | end |
| + | return r |
| + | end -- URLutil.isDomainInt() |
| + | |
| + | |
| + | |
| + | URLutil.isHost = function ( s ) |
| + | return URLutil.isDomain( s ) or URLutil.isIP( s ) |
| + | end -- URLutil.isHost() |
| + | |
| | | |
| | | |
| + | URLutil.isHostPathResource = function ( s ) |
| + | local r = URLutil.isResourceURL( s ) |
| + | if not r and s then |
| + | r = URLutil.isResourceURL( "//" .. mw.text.trim( s ) ) |
| + | end |
| + | return r |
| + | end -- URLutil.isHostPathResource() |
| | | |
− | function _isHost( s )
| |
− | return _isDomain( s ) or _isIP( s )
| |
− | end -- _isHost()
| |
| | | |
| | | |
| + | URLutil.isIP = function ( s ) |
| + | return URLutil.isIPv4( s ) and 4 or URLutil.isIPv6( s ) and 6 |
| + | end -- URLutil.isIP() |
| | | |
− | function _isIP( s ) | + | |
− | return _isIPv4( s ) and 4 or _isIPv6( s ) and 6 | + | |
− | end -- _isIP() | + | URLutil.isIPlocal = function ( s ) |
| + | -- IPv4 according to RFC 1918, RFC 1122; even any 0.0.0.0 (RFC 5735) |
| + | local r = false |
| + | local num = s:match( "^ *([01][0-9]*)%." ) |
| + | if num then |
| + | num = tonumber( num ) |
| + | if num == 0 then |
| + | r = s:match( "^ *0+%.[0-9]+%.[0-9]+%.[0-9]+ *$" ) |
| + | elseif num == 10 or num == 127 then |
| + | -- loopback; private/local host: 127.0.0.1 |
| + | r = URLutil.isIPv4( s ) |
| + | elseif num == 169 then |
| + | -- 169.254.*.* |
| + | elseif num == 172 then |
| + | -- 172.(16...31).*.* |
| + | num = s:match( "^ *0*172%.([0-9]+)%." ) |
| + | if num then |
| + | num = tonumber( num ) |
| + | if num >= 16 and num <= 31 then |
| + | r = URLutil.isIPv4( s ) |
| + | end |
| + | end |
| + | elseif beg == 192 then |
| + | -- 192.168.*.* |
| + | num = s:match( "^ *0*192%.([0-9]+)%." ) |
| + | if num then |
| + | num = tonumber( num ) |
| + | if num == 168 then |
| + | r = URLutil.isIPv4( s ) |
| + | end |
| + | end |
| + | end |
| + | end |
| + | if r then |
| + | r = true |
| + | end |
| + | return r |
| + | end -- URLutil.isIPlocal() |
| | | |
| | | |
| | | |
− | function _isIPv4( s ) | + | URLutil.isIPv4 = function ( s ) |
| local function legal( n ) | | local function legal( n ) |
| return ( tonumber( n ) < 256 ) | | return ( tonumber( n ) < 256 ) |
| end | | end |
| + | local r = false |
| if type( s ) == "string" then | | if type( s ) == "string" then |
− | local p1, p2, p3, p4 = s:match( "^%s*([12][0-9]?[0-9]?)%.([012][0-9]?[0-9]?)%.([012][0-9]?[0-9]?)%.([012][0-9]?[0-9]?)%s*$" ) | + | local p1, p2, p3, p4 = s:match( "^%s*([1-9][0-9]?[0-9]?)%.([12]?[0-9]?[0-9])%.([12]?[0-9]?[0-9])%.([12]?[0-9]?[0-9])%s*$" ) |
| if p1 and p2 and p3 and p4 then | | if p1 and p2 and p3 and p4 then |
− | return legal( p1 ) and legal( p2 ) and legal( p3 ) and legal( p4 ) | + | r = legal( p1 ) and legal( p2 ) and legal( p3 ) and legal( p4 ) |
| end | | end |
| end | | end |
− | return false | + | return r |
− | end -- _isIPv4() | + | end -- URLutil.isIPv4() |
| | | |
| | | |
| | | |
− | function _isIPv6( s ) | + | URLutil.isIPv6 = function ( s ) |
| local dcolon, groups | | local dcolon, groups |
| if type( s ) ~= "string" | | if type( s ) ~= "string" |
Zeile 178: |
Zeile 815: |
| ( dcolon == 0 and groups == 8 ) ) | | ( dcolon == 0 and groups == 8 ) ) |
| and ( s:len() == 0 or ( dcolon == 1 and s == ":" ) ) -- might be one dangling : if original ended with :: | | and ( s:len() == 0 or ( dcolon == 1 and s == ":" ) ) -- might be one dangling : if original ended with :: |
− | end -- _isIPv6() | + | end -- URLutil.isIPv6() |
| | | |
| | | |
| | | |
− | function _isMailAddress( s ) | + | URLutil.isMailAddress = function ( s ) |
| if type( s ) == "string" then | | if type( s ) == "string" then |
− | s = mw.ustring.match( s, "^%s*[%w%.%%_-]+@([%w%.%%-]+)%s*$" )
| + | s = mw.ustring.match( s, "^%s*[%w%.%%_-]+@([%w%.%%-]+)%s*$" ) |
− | return _isDomain( s )
| + | return URLutil.isDomain( s ) |
| end | | end |
| return false | | return false |
− | end -- _isMailAddress() | + | end -- URLutil.isMailAddress() |
| | | |
| | | |
| | | |
− | function _isMailLink( s ) | + | URLutil.isMailLink = function ( s ) |
| if type( s ) == "string" then | | if type( s ) == "string" then |
− | local addr
| + | local addr |
− | s, addr = mw.ustring.match( s, "^%s([Mm][Aa][Ii][Ll][Tt][Oo]):(%S[%w%.%%_-]*@[%w%.%%-]+)%s$*" )
| + | s, addr = mw.ustring.match( s, "^%s*([Mm][Aa][Ii][Ll][Tt][Oo]):(%S[%w%.%%_-]*@[%w%.%%-]+)%s*$" ) |
− | if type( s ) == "string" then
| + | if type( s ) == "string" then |
− | if s:lower() == "mailto" then
| + | if s:lower() == "mailto" then |
− | return _isMailAddress( addr )
| + | return URLutil.isMailAddress( addr ) |
− | end
| + | end |
− | end
| |
− | end
| |
− | return false
| |
− | end -- _isMailLink() | |
− | | |
− | | |
− | | |
− | function _isPort( port )
| |
− | if type( port ) == "string" then
| |
− | if port:find( "^%s*:[1-9][0-9]*%s*$" ) then
| |
− | return true -- maybe numeric > 0
| |
| end | | end |
| end | | end |
| return false | | return false |
− | end -- _isPort() | + | end -- URLutil.isMailLink() |
| | | |
| | | |
| | | |
− | function _isProtocolWiki( prot ) | + | local function isProtocolAccepted( prot, supplied ) |
| if type( prot ) == "string" then | | if type( prot ) == "string" then |
| local scheme, colon, slashes = mw.ustring.match( prot, "^%s*([a-zA-Z]*)(:?)(/?/?)%s*$" ) | | local scheme, colon, slashes = mw.ustring.match( prot, "^%s*([a-zA-Z]*)(:?)(/?/?)%s*$" ) |
Zeile 227: |
Zeile 853: |
| end | | end |
| elseif colon == ":" or slashes == "" then | | elseif colon == ":" or slashes == "" then |
− | local s = " ftp git http https irc ircs mms nntp svn telnet worldwind " | + | local s = supplied:match( " " .. scheme:lower() .. " " ) |
− | s = s:match( " " .. scheme:lower() .. " " )
| |
| if type( s ) == "string" then | | if type( s ) == "string" then |
− | return true
| + | return true |
| end | | end |
| end | | end |
Zeile 236: |
Zeile 861: |
| end | | end |
| return false | | return false |
− | end -- _isProtocolWiki() | + | end -- isProtocolAccepted() |
| | | |
| | | |
| | | |
− | function _isRessourceURL( url ) | + | URLutil.isProtocolDialog = function ( prot ) |
− | local scheme = _getScheme( url ) | + | return isProtocolAccepted( prot, " mailto irc ircs ssh telnet " ) |
| + | end -- URLutil.isProtocolDialog() |
| + | |
| + | |
| + | |
| + | URLutil.isProtocolWiki = function ( prot ) |
| + | return isProtocolAccepted( prot, |
| + | " ftp ftps git http https nntp sftp svn worldwind " ) |
| + | end -- URLutil.isProtocolWiki() |
| + | |
| + | |
| + | |
| + | URLutil.isResourceURL = function ( url ) |
| + | local scheme = URLutil.getScheme( url ) |
| if scheme then | | if scheme then |
− | local s = " // http:// https:// ftp:// " | + | local s = " // http:// https:// ftp:// sftp:// " |
− | s = s:find( " " .. scheme .. " " ) | + | s = s:find( string.format( " %s ", scheme ) ) |
| if s then | | if s then |
− | if _getAuthority( url ) then | + | if URLutil.getAuthority( url ) then |
| if not url:match( "%S%s+%S" ) then | | if not url:match( "%S%s+%S" ) then |
− | return true | + | local s1, s2 = url:match( "^([^#]+)(#.*)$" ) |
| + | if s2 then |
| + | if url:match( "^%s*[a-zA-Z]*:?//(.+)/" ) then |
| + | return true |
| + | end |
| + | else |
| + | return true |
| + | end |
| end | | end |
| end | | end |
Zeile 254: |
Zeile 899: |
| end | | end |
| return false | | return false |
− | end -- _isRessourceURL() | + | end -- URLutil.isResourceURL() |
| | | |
| | | |
| | | |
− | function _isSuspiciousURL( url ) | + | URLutil.isSuspiciousURL = function ( url ) |
− | if _isRessourceURL( url ) then | + | if URLutil.isResourceURL( url ) then |
− | local s = _getAuthority( url ) | + | local s = URLutil.getAuthority( url ) |
| local pat = "[%[|%]" .. | | local pat = "[%[|%]" .. |
− | mw.ustring.char( 8201, 45, 8207, 8234, 45, 8239, 8288 ) | + | mw.ustring.char( 34, |
| + | 8201, 45, 8207, |
| + | 8234, 45, 8239, |
| + | 8288 ) |
| .. "]" | | .. "]" |
| if s:find( "@" ) | | if s:find( "@" ) |
Zeile 270: |
Zeile 918: |
| return true | | return true |
| end | | end |
− | -- TODO zero width character | + | -- TODO zero width character ?? |
| return false | | return false |
| end | | end |
| return true | | return true |
− | end -- _isSuspiciousURL() | + | end -- URLutil.isSuspiciousURL() |
| | | |
| | | |
| | | |
− | function _isUnescapedURL( url, trailing ) | + | URLutil.isUnescapedURL = function ( url, trailing ) |
| if type( trailing ) ~= "string" then | | if type( trailing ) ~= "string" then |
− | if _isWebURL( url ) then | + | if URLutil.isWebURL( url ) then |
| if url:match( "[%[|%]]" ) then | | if url:match( "[%[|%]]" ) then |
| return true | | return true |
Zeile 287: |
Zeile 935: |
| end | | end |
| return false | | return false |
− | end -- _isUnescapedURL() | + | end -- URLutil.isUnescapedURL() |
| | | |
| | | |
| | | |
− | function _isWebURL( url ) | + | URLutil.isWebURL = function ( url ) |
− | if _getScheme( url ) and _getAuthority( url ) then | + | if URLutil.getScheme( url ) and URLutil.getAuthority( url ) then |
− | if not url:match( "%S%s+%S" ) then | + | if not url:find( "%S%s+%S" ) and |
| + | not url:find( "''", 1, true ) then |
| return true | | return true |
| end | | end |
| end | | end |
| return false | | return false |
− | end -- _isWebURL() | + | end -- URLutil.isWebURL() |
| + | |
| + | |
| + | |
| + | URLutil.wikiEscapeURL = function ( url ) |
| + | if url:find( "[%[|%]]" ) then |
| + | local n |
| + | url, n = url:gsub( "%[", "[" ) |
| + | :gsub( "|", "|" ) |
| + | :gsub( "%]", "]" ) |
| + | end |
| + | return url |
| + | end -- URLutil.wikiEscapeURL() |
| + | |
| + | |
| | | |
| + | Failsafe.failsafe = function ( atleast ) |
| + | -- Retrieve versioning and check for compliance |
| + | -- Precondition: |
| + | -- atleast -- string, with required version or "wikidata" or "~" |
| + | -- or false |
| + | -- Postcondition: |
| + | -- Returns string -- with queried version, also if problem |
| + | -- false -- if appropriate |
| + | -- 2019-10-15 |
| + | local last = ( atleast == "~" ) |
| + | local since = atleast |
| + | local r |
| + | if last or since == "wikidata" then |
| + | local item = Failsafe.item |
| + | since = false |
| + | if type( item ) == "number" and item > 0 then |
| + | local entity = mw.wikibase.getEntity( string.format( "Q%d", |
| + | item ) ) |
| + | if type( entity ) == "table" then |
| + | local seek = Failsafe.serialProperty or "P348" |
| + | local vsn = entity:formatPropertyValues( seek ) |
| + | if type( vsn ) == "table" and |
| + | type( vsn.value ) == "string" and |
| + | vsn.value ~= "" then |
| + | if last and vsn.value == Failsafe.serial then |
| + | r = false |
| + | else |
| + | r = vsn.value |
| + | end |
| + | end |
| + | end |
| + | end |
| + | end |
| + | if type( r ) == "nil" then |
| + | if not since or since <= Failsafe.serial then |
| + | r = Failsafe.serial |
| + | else |
| + | r = false |
| + | end |
| + | end |
| + | return r |
| + | end -- Failsafe.failsafe() |
| + | |
| + | |
| + | |
| + | local function Template( frame, action, amount ) |
| + | -- Run actual code from template transclusion |
| + | -- Precondition: |
| + | -- frame -- object |
| + | -- action -- string, with function name |
| + | -- amount -- number, of args if > 1 |
| + | -- Postcondition: |
| + | -- Return string or not |
| + | local n = amount or 1 |
| + | local v = { } |
| + | local r, s |
| + | for i = 1, n do |
| + | s = frame.args[ i ] |
| + | if s then |
| + | s = mw.text.trim( s ) |
| + | if s ~= "" then |
| + | v[ i ] = s |
| + | end |
| + | end |
| + | end -- for i |
| + | if v[ 1 ] then |
| + | r = URLutil[ action ]( v[ 1 ], v[ 2 ], v[ 3 ] ) |
| + | end |
| + | return r |
| + | end -- Template() |
| | | |
| | | |
− | -- Provide template access
| |
| | | |
| local p = {} | | local p = {} |
| | | |
| + | function p.decode( frame ) |
| + | return Template( frame, "decode", 2 ) or "" |
| + | end |
| + | function p.encode( frame ) |
| + | return Template( frame, "encode", 2 ) or "" |
| + | end |
| function p.getAuthority( frame ) | | function p.getAuthority( frame ) |
− | return _getAuthority( frame.args[ 1 ] ) or "" | + | return Template( frame, "getAuthority" ) or "" |
| + | end |
| + | function p.getFragment( frame ) |
| + | local r = Template( frame, "getFragment", 2 ) |
| + | if r then |
| + | r = "#" .. r |
| + | else |
| + | r = "" |
| + | end |
| + | return r |
| end | | end |
| function p.getHost( frame ) | | function p.getHost( frame ) |
− | return _getHost( frame.args[ 1 ] ) or "" | + | return Template( frame, "getHost" ) or "" |
| + | end |
| + | function p.getLocation( frame ) |
| + | return Template( frame, "getLocation" ) or "" |
| + | end |
| + | function p.getNormalized( frame ) |
| + | return Template( frame, "getNormalized" ) or "" |
| + | end |
| + | function p.getPath( frame ) |
| + | return Template( frame, "getPath" ) or "" |
| end | | end |
| function p.getPort( frame ) | | function p.getPort( frame ) |
− | return _getPort( frame.args[ 1 ] ) or "" | + | return Template( frame, "getPort" ) or "" |
| + | end |
| + | function p.getQuery( frame ) |
| + | local r = Template( frame, "getQuery", 3 ) |
| + | if r then |
| + | local key = frame.args[ 2 ] |
| + | if key then |
| + | key = mw.text.trim( key ) |
| + | if key == "" then |
| + | key = nil |
| + | end |
| + | end |
| + | if not key then |
| + | r = "?" .. r |
| + | end |
| + | else |
| + | r = "" |
| + | end |
| + | return r |
| + | end |
| + | function p.getRelativePath( frame ) |
| + | return Template( frame, "getRelativePath" ) or "" |
| end | | end |
| function p.getScheme( frame ) | | function p.getScheme( frame ) |
− | return _getScheme( frame.args[ 1 ] ) or "" | + | return Template( frame, "getScheme" ) or "" |
| + | end |
| + | function p.getSortkey( frame ) |
| + | return Template( frame, "getSortkey" ) or "" |
| + | end |
| + | function p.getTLD( frame ) |
| + | return Template( frame, "getTLD" ) or "" |
| + | end |
| + | function p.getTop2domain( frame ) |
| + | return Template( frame, "getTop2domain" ) or "" |
| + | end |
| + | function p.getTop3domain( frame ) |
| + | return Template( frame, "getTop3domain" ) or "" |
| end | | end |
| function p.isAuthority( frame ) | | function p.isAuthority( frame ) |
− | return _isAuthority( frame.args[ 1 ] ) and "1" or "" | + | return Template( frame, "isAuthority" ) and "1" or "" |
| end | | end |
| function p.isDomain( frame ) | | function p.isDomain( frame ) |
− | return _isDomain( frame.args[ 1 ] ) and "1" or "" | + | return Template( frame, "isDomain" ) and "1" or "" |
| + | end |
| + | function p.isDomainExample( frame ) |
| + | return Template( frame, "isDomainExample" ) and "1" or "" |
| + | end |
| + | function p.isDomainInt( frame ) |
| + | return Template( frame, "isDomainInt" ) and "1" or "" |
| end | | end |
| function p.isHost( frame ) | | function p.isHost( frame ) |
− | return _isHost( frame.args[ 1 ] ) and "1" or "" | + | return Template( frame, "isHost" ) and "1" or "" |
| + | end |
| + | function p.isHostPathResource( frame ) |
| + | return Template( frame, "isHostPathResource" ) and "1" or "" |
| end | | end |
| function p.isIP( frame ) | | function p.isIP( frame ) |
− | return _isIP( frame.args[ 1 ] ) or "" | + | return Template( frame, "isIP" ) or "" |
| + | end |
| + | function p.isIPlocal( frame ) |
| + | return Template( frame, "isIPlocal" ) and "1" or "" |
| end | | end |
| function p.isIPv4( frame ) | | function p.isIPv4( frame ) |
− | return _isIPv4( frame.args[ 1 ] ) and "1" or "" | + | return Template( frame, "isIPv4" ) and "1" or "" |
| end | | end |
| function p.isIPv6( frame ) | | function p.isIPv6( frame ) |
− | return _isIPv6( frame.args[ 1 ] ) and "1" or "" | + | return Template( frame, "isIPv6" ) and "1" or "" |
| end | | end |
| function p.isMailAddress( frame ) | | function p.isMailAddress( frame ) |
− | return _isMailAddress( frame.args[ 1 ] ) and "1" or "" | + | return Template( frame, "isMailAddress" ) and "1" or "" |
| end | | end |
| function p.isMailLink( frame ) | | function p.isMailLink( frame ) |
− | return _isMailLink( frame.args[ 1 ] ) and "1" or "" | + | return Template( frame, "isMailLink" ) and "1" or "" |
| end | | end |
− | function p.isPort( frame ) -- OBSOLETED | + | function p.isProtocolDialog( frame ) |
− | return _isPort( frame.args[ 1 ] ) and "1" or "" | + | return Template( frame, "isProtocolDialog" ) and "1" or "" |
| end | | end |
| function p.isProtocolWiki( frame ) | | function p.isProtocolWiki( frame ) |
− | return _isProtocolWiki( frame.args[ 1 ] ) and "1" or "" | + | return Template( frame, "isProtocolWiki" ) and "1" or "" |
| end | | end |
− | function p.isRessourceURL( frame ) | + | function p.isResourceURL( frame ) |
− | return _isRessourceURL( frame.args[ 1 ] ) and "1" or "" | + | return Template( frame, "isResourceURL" ) and "1" or "" |
| end | | end |
| function p.isSuspiciousURL( frame ) | | function p.isSuspiciousURL( frame ) |
− | return _isSuspiciousURL( frame.args[ 1 ] ) and "1" or "" | + | return Template( frame, "isSuspiciousURL" ) and "1" or "" |
| end | | end |
| function p.isUnescapedURL( frame ) | | function p.isUnescapedURL( frame ) |
− | return _isUnescapedURL( frame.args[ 1 ], frame.args[ 2 ] ) and "1" or "" | + | return Template( frame, "isUnescapedURL", 2 ) and "1" or "" |
| end | | end |
| function p.isWebURL( frame ) | | function p.isWebURL( frame ) |
− | return _isWebURL( frame.args[ 1 ] ) and "1" or "" | + | return Template( frame, "isWebURL" ) and "1" or "" |
| + | end |
| + | function p.wikiEscapeURL( frame ) |
| + | return Template( frame, "wikiEscapeURL" ) |
| + | end |
| + | p.failsafe = function ( frame ) |
| + | local s = type( frame ) |
| + | local since |
| + | if s == "table" then |
| + | since = frame.args[ 1 ] |
| + | elseif s == "string" then |
| + | since = frame |
| + | end |
| + | if since then |
| + | since = mw.text.trim( since ) |
| + | if since == "" then |
| + | since = false |
| + | end |
| + | end |
| + | return Failsafe.failsafe( since ) or "" |
| + | end |
| + | function p.URLutil() |
| + | return URLutil |
| end | | end |
| | | |
| return p | | return p |