Zeile 1: |
Zeile 1: |
| local URLutil = { suite = "URLutil", | | local URLutil = { suite = "URLutil", |
− | serial = "2016-03-06" }; | + | serial = "2020-07-01", |
| + | item = 10859193 } |
| --[=[ | | --[=[ |
| Utilities for URL etc. on www. | | Utilities for URL etc. on www. |
| + | * decode() |
| + | * encode() |
| * getAuthority() | | * getAuthority() |
| * getFragment() | | * getFragment() |
Zeile 14: |
Zeile 17: |
| * getRelativePath() | | * getRelativePath() |
| * getScheme() | | * getScheme() |
| + | * getSortkey() |
| * getTLD() | | * getTLD() |
| * getTop2domain() | | * getTop2domain() |
Zeile 22: |
Zeile 26: |
| * isDomainInt() | | * isDomainInt() |
| * isHost() | | * isHost() |
| + | * isHostPathResource() |
| * isIP() | | * isIP() |
| * isIPlocal() | | * isIPlocal() |
Zeile 35: |
Zeile 40: |
| * isWebURL() | | * isWebURL() |
| * wikiEscapeURL() | | * wikiEscapeURL() |
− | Only [[dotted decimal]] notation for IPv4 supported. | + | * failsafe() |
| + | Only [[dotted decimal]] notation for IPv4 expected. |
| Does not support dotted hexadecimal, dotted octal, or single-number formats. | | Does not support dotted hexadecimal, dotted octal, or single-number formats. |
| IPv6 URL (bracketed) not yet implemented; might need Wikintax escaping anyway. | | IPv6 URL (bracketed) not yet implemented; might need Wikintax escaping anyway. |
| ]=] | | ]=] |
− | | + | local Failsafe = URLutil |
− | | |
− | | |
− | URLutil.getURIScheme = function ( uri )
| |
− | if type( uri ) == "string" then
| |
− | local prot, colon, slashes = uri:match( "^%s*([a-zA-Z]*)(:?)(/?/?)" )
| |
− | if #colon == 1 and #prot >= 2 then
| |
− | return prot:lower()
| |
− | elseif #slashes == 2 and #prot == 0 then
| |
− | return "//"
| |
− | end
| |
− | end
| |
− | return false
| |
− | end -- getURIScheme()
| |
| | | |
| | | |
Zeile 58: |
Zeile 51: |
| local decodeComponentProtect = { F = "\"#%<>[\]^`{|}", | | local decodeComponentProtect = { F = "\"#%<>[\]^`{|}", |
| P = "\"#%<>[\]^`{|}/?", | | P = "\"#%<>[\]^`{|}/?", |
− | Q = "\"#%<>[\]^`{|}&=+;", | + | Q = "\"#%<>[\]^`{|}&=+;,", |
− | X = "\"#%<>[\]^`{|}&=+;/?" } | + | X = "\"#%<>[\]^`{|}&=+;,/?" } |
| | | |
| | | |
Zeile 161: |
Zeile 154: |
| local r = URLutil.getHost( url ) | | local r = URLutil.getHost( url ) |
| if r then | | if r then |
− | local pattern = "[%w%%]+%.%a[%w-]*%a)$" | + | local pattern = "[%w%%%-]+%.%a[%w%-]*%a)$" |
| if mode == 3 then | | if mode == 3 then |
− | pattern = "[%w%%]+%." .. pattern | + | pattern = "[%w%%%-]+%." .. pattern |
| end | | end |
| r = mw.ustring.match( "." .. r, "%.(" .. pattern ) | | r = mw.ustring.match( "." .. r, "%.(" .. pattern ) |
Zeile 201: |
Zeile 194: |
| return r | | return r |
| end -- getHash() | | end -- getHash() |
| + | |
| + | |
| + | |
| + | URLutil.decode = function ( url, enctype ) |
| + | local r, s |
| + | if type( enctype ) == "string" then |
| + | s = mw.text.trim( enctype ) |
| + | if s == "" then |
| + | s = false |
| + | else |
| + | s = s:upper() |
| + | end |
| + | end |
| + | r = mw.text.encode( mw.uri.decode( url, s ) ) |
| + | if r:find( "[%[|%]]" ) then |
| + | local k |
| + | r, k = r:gsub( "%[", "[" ) |
| + | :gsub( "|", "|" ) |
| + | :gsub( "%]", "]" ) |
| + | end |
| + | return r |
| + | end -- URLutil.decode() |
| + | |
| + | |
| + | |
| + | URLutil.encode = function ( url, enctype ) |
| + | local k, r, s |
| + | if type( enctype ) == "string" then |
| + | s = mw.text.trim( enctype ) |
| + | if s == "" then |
| + | s = false |
| + | else |
| + | s = s:upper() |
| + | end |
| + | end |
| + | r = mw.uri.encode( url, s ) |
| + | k = r:byte( 1, 1 ) |
| + | if -- k == 35 or -- # |
| + | k == 42 or -- * |
| + | k == 58 or -- : |
| + | k == 59 then -- ; |
| + | r = string.format( "%%%X%s", k, r:sub( 2 ) ) |
| + | end |
| + | if r:find( "[%[|%]]" ) then |
| + | r, k = r:gsub( "%[", "%5B" ) |
| + | :gsub( "|", "%7C" ) |
| + | :gsub( "%]", "%5D" ) |
| + | end |
| + | return r |
| + | end -- URLutil.encode() |
| | | |
| | | |
Zeile 269: |
Zeile 312: |
| local r = URLutil.getAuthority( url ) | | local r = URLutil.getAuthority( url ) |
| if r then | | if r then |
− | r = mw.ustring.match( r, "^([%w%.%%_-]+):?[%d]*$" ) | + | r = mw.ustring.match( r, "^([%w%.%%_%-]+):?[%d]*$" ) |
| end | | end |
| return r | | return r |
Zeile 340: |
Zeile 383: |
| sP = decodeComponentPercent( sP, "P" ) | | sP = decodeComponentPercent( sP, "P" ) |
| end | | end |
− | r = r:sub( 1, j - 1 ) | + | r = r:sub( 1, j - 1 ) |
| end | | end |
| elseif j then | | elseif j then |
Zeile 364: |
Zeile 407: |
| end | | end |
| end | | end |
− | r = r:gsub( "%[", "%%5B" ) | + | r = r:gsub( " ", "%%20" ) |
| + | :gsub( "%[", "%%5B" ) |
| :gsub( "|", "%%7C" ) | | :gsub( "|", "%%7C" ) |
| :gsub( "%]", "%%5D" ) | | :gsub( "%]", "%%5D" ) |
| + | :gsub( "%<", "%%3C" ) |
| + | :gsub( "%>", "%%3E" ) |
| end | | end |
| return r | | return r |
Zeile 514: |
Zeile 560: |
| return r | | return r |
| end -- URLutil.getScheme() | | end -- URLutil.getScheme() |
| + | |
| + | |
| + | |
| + | URLutil.getSortkey = function ( url ) |
| + | local r = url |
| + | if type( url ) == "string" then |
| + | local i = url:find( "//" ) |
| + | if i then |
| + | local scheme |
| + | if i == 0 then |
| + | scheme = "" |
| + | else |
| + | scheme = url:match( "^%s*([a-zA-Z]*)://" ) |
| + | end |
| + | if scheme then |
| + | local s = url:sub( i + 2 ) |
| + | local comps, site, m, suffix |
| + | scheme = scheme:lower() |
| + | i = s:find( "/" ) |
| + | if i and i > 1 then |
| + | suffix = s:sub( i + 1 ) -- mw.uri.encode() |
| + | s = s:sub( 1, i - 1 ) |
| + | suffix = suffix:gsub( "#", " " ) |
| + | else |
| + | suffix = "" |
| + | end |
| + | site, m = s:match( "^(.+)(:%d+)$" ) |
| + | if not m then |
| + | site = s |
| + | m = 0 |
| + | end |
| + | comps = mw.text.split( site:lower(), ".", true ) |
| + | r = "///" |
| + | for i = #comps, 2, -1 do |
| + | r = string.format( "%s%s.", r, comps[ i ] ) |
| + | end -- for --i |
| + | r = string.format( "%s%s %d %s: %s", |
| + | r, comps[ 1 ], m, scheme, suffix ) |
| + | end |
| + | end |
| + | end |
| + | return r |
| + | end -- URLutil.getSortkey() |
| | | |
| | | |
Zeile 520: |
Zeile 609: |
| local r = URLutil.getHost( url ) | | local r = URLutil.getHost( url ) |
| if r then | | if r then |
− | r = mw.ustring.match( r, "[%w]+%.(%a[%w-]*%a)$" ) | + | r = mw.ustring.match( r, "%w+%.(%a[%w%-]*%a)$" ) |
| if not r then | | if not r then |
| r = false | | r = false |
Zeile 567: |
Zeile 656: |
| local r | | local r |
| if type( s ) == "string" then | | if type( s ) == "string" then |
− | local scan = "^%s*([%w%.%%_-]+%w)%.(%a[%w-]*%a)%s*$" | + | local scan = "^%s*([%w%.%%_-]*%w)%.(%a[%w-]*%a)%s*$" |
| local scope | | local scope |
| s, scope = mw.ustring.match( s, scan ) | | s, scope = mw.ustring.match( s, scan ) |
Zeile 629: |
Zeile 718: |
| return URLutil.isDomain( s ) or URLutil.isIP( s ) | | return URLutil.isDomain( s ) or URLutil.isIP( s ) |
| end -- URLutil.isHost() | | end -- URLutil.isHost() |
| + | |
| + | |
| + | |
| + | URLutil.isHostPathResource = function ( s ) |
| + | local r = URLutil.isResourceURL( s ) |
| + | if not r and s then |
| + | r = URLutil.isResourceURL( "//" .. mw.text.trim( s ) ) |
| + | end |
| + | return r |
| + | end -- URLutil.isHostPathResource() |
| | | |
| | | |
Zeile 763: |
Zeile 862: |
| return false | | return false |
| end -- isProtocolAccepted() | | end -- isProtocolAccepted() |
− |
| |
− |
| |
− |
| |
− | URLutil.isProtocolMW = function ( prot )
| |
− | return isProtocolAccepted( prot,
| |
− | " http https ftp ftps ssh sftp irc ircs xmpp sip sips gopher telnet nntp worldwind mailto tel sms news svn git mms bitcoin magnet urn geo " )
| |
− | end -- URLutil.isProtocolMW()
| |
| | | |
| | | |
Zeile 796: |
Zeile 888: |
| local s1, s2 = url:match( "^([^#]+)(#.*)$" ) | | local s1, s2 = url:match( "^([^#]+)(#.*)$" ) |
| if s2 then | | if s2 then |
− | if url:match( "^%s*[a-zA-Z]*://(.+)/" ) then | + | if url:match( "^%s*[a-zA-Z]*:?//(.+)/" ) then |
| return true | | return true |
| end | | end |
Zeile 815: |
Zeile 907: |
| local s = URLutil.getAuthority( url ) | | local s = URLutil.getAuthority( url ) |
| local pat = "[%[|%]" .. | | local pat = "[%[|%]" .. |
− | mw.ustring.char( 8201, 45, 8207, | + | mw.ustring.char( 34, |
| + | 8201, 45, 8207, |
| 8234, 45, 8239, | | 8234, 45, 8239, |
| 8288 ) | | 8288 ) |
Zeile 848: |
Zeile 941: |
| URLutil.isWebURL = function ( url ) | | URLutil.isWebURL = function ( url ) |
| if URLutil.getScheme( url ) and URLutil.getAuthority( url ) then | | if URLutil.getScheme( url ) and URLutil.getAuthority( url ) then |
− | if not url:match( "%S%s+%S" ) then | + | if not url:find( "%S%s+%S" ) and |
| + | not url:find( "''", 1, true ) then |
| return true | | return true |
| end | | end |
Zeile 869: |
Zeile 963: |
| | | |
| | | |
− | -- Provide template access and expose URLutil table to require | + | Failsafe.failsafe = function ( atleast ) |
| + | -- Retrieve versioning and check for compliance |
| + | -- Precondition: |
| + | -- atleast -- string, with required version or "wikidata" or "~" |
| + | -- or false |
| + | -- Postcondition: |
| + | -- Returns string -- with queried version, also if problem |
| + | -- false -- if appropriate |
| + | -- 2019-10-15 |
| + | local last = ( atleast == "~" ) |
| + | local since = atleast |
| + | local r |
| + | if last or since == "wikidata" then |
| + | local item = Failsafe.item |
| + | since = false |
| + | if type( item ) == "number" and item > 0 then |
| + | local entity = mw.wikibase.getEntity( string.format( "Q%d", |
| + | item ) ) |
| + | if type( entity ) == "table" then |
| + | local seek = Failsafe.serialProperty or "P348" |
| + | local vsn = entity:formatPropertyValues( seek ) |
| + | if type( vsn ) == "table" and |
| + | type( vsn.value ) == "string" and |
| + | vsn.value ~= "" then |
| + | if last and vsn.value == Failsafe.serial then |
| + | r = false |
| + | else |
| + | r = vsn.value |
| + | end |
| + | end |
| + | end |
| + | end |
| + | end |
| + | if type( r ) == "nil" then |
| + | if not since or since <= Failsafe.serial then |
| + | r = Failsafe.serial |
| + | else |
| + | r = false |
| + | end |
| + | end |
| + | return r |
| + | end -- Failsafe.failsafe() |
| + | |
| + | |
| + | |
| + | local function Template( frame, action, amount ) |
| + | -- Run actual code from template transclusion |
| + | -- Precondition: |
| + | -- frame -- object |
| + | -- action -- string, with function name |
| + | -- amount -- number, of args if > 1 |
| + | -- Postcondition: |
| + | -- Return string or not |
| + | local n = amount or 1 |
| + | local v = { } |
| + | local r, s |
| + | for i = 1, n do |
| + | s = frame.args[ i ] |
| + | if s then |
| + | s = mw.text.trim( s ) |
| + | if s ~= "" then |
| + | v[ i ] = s |
| + | end |
| + | end |
| + | end -- for i |
| + | if v[ 1 ] then |
| + | r = URLutil[ action ]( v[ 1 ], v[ 2 ], v[ 3 ] ) |
| + | end |
| + | return r |
| + | end -- Template() |
| + | |
| + | |
| | | |
| local p = {} | | local p = {} |
| | | |
− | function p.getURIScheme( frame ) | + | function p.decode( frame ) |
− | return URLutil.getURIScheme( frame.args[ 1 ] ) or "" | + | return Template( frame, "decode", 2 ) or "" |
| + | end |
| + | function p.encode( frame ) |
| + | return Template( frame, "encode", 2 ) or "" |
| end | | end |
| function p.getAuthority( frame ) | | function p.getAuthority( frame ) |
− | return URLutil.getAuthority( frame.args[ 1 ] ) or "" | + | return Template( frame, "getAuthority" ) or "" |
| end | | end |
| function p.getFragment( frame ) | | function p.getFragment( frame ) |
− | local r = URLutil.getFragment( frame.args[ 1 ], frame.args[ 2 ] ) | + | local r = Template( frame, "getFragment", 2 ) |
| if r then | | if r then |
| r = "#" .. r | | r = "#" .. r |
Zeile 889: |
Zeile 1.057: |
| end | | end |
| function p.getHost( frame ) | | function p.getHost( frame ) |
− | return URLutil.getHost( frame.args[ 1 ] ) or "" | + | return Template( frame, "getHost" ) or "" |
| end | | end |
| function p.getLocation( frame ) | | function p.getLocation( frame ) |
− | return URLutil.getLocation( frame.args[ 1 ] ) or "" | + | return Template( frame, "getLocation" ) or "" |
| end | | end |
| function p.getNormalized( frame ) | | function p.getNormalized( frame ) |
− | return URLutil.getNormalized( frame.args[ 1 ] ) or "" | + | return Template( frame, "getNormalized" ) or "" |
| end | | end |
| function p.getPath( frame ) | | function p.getPath( frame ) |
− | return URLutil.getPath( frame.args[ 1 ] ) or "" | + | return Template( frame, "getPath" ) or "" |
| end | | end |
| function p.getPort( frame ) | | function p.getPort( frame ) |
− | return URLutil.getPort( frame.args[ 1 ] ) or "" | + | return Template( frame, "getPort" ) or "" |
| end | | end |
| function p.getQuery( frame ) | | function p.getQuery( frame ) |
− | local r | + | local r = Template( frame, "getQuery", 3 ) |
− | local key = frame.args[ 2 ] | + | if r then |
− | if key then
| + | local key = frame.args[ 2 ] |
− | key = mw.text.trim( key )
| + | if key then |
− | if key == "" then
| + | key = mw.text.trim( key ) |
− | key = nil
| + | if key == "" then |
| + | key = nil |
| + | end |
| end | | end |
− | end
| |
− | r = URLutil.getQuery( frame.args[ 1 ], key, frame.args[ 3 ] )
| |
− | if r then
| |
| if not key then | | if not key then |
| r = "?" .. r | | r = "?" .. r |
Zeile 923: |
Zeile 1.090: |
| end | | end |
| function p.getRelativePath( frame ) | | function p.getRelativePath( frame ) |
− | return URLutil.getRelativePath( frame.args[ 1 ] ) or "" | + | return Template( frame, "getRelativePath" ) or "" |
| end | | end |
| function p.getScheme( frame ) | | function p.getScheme( frame ) |
− | return URLutil.getScheme( frame.args[ 1 ] ) or "" | + | return Template( frame, "getScheme" ) or "" |
| + | end |
| + | function p.getSortkey( frame ) |
| + | return Template( frame, "getSortkey" ) or "" |
| end | | end |
| function p.getTLD( frame ) | | function p.getTLD( frame ) |
− | return URLutil.getTLD( frame.args[ 1 ] ) or "" | + | return Template( frame, "getTLD" ) or "" |
| end | | end |
| function p.getTop2domain( frame ) | | function p.getTop2domain( frame ) |
− | return URLutil.getTop2domain( frame.args[ 1 ] ) or "" | + | return Template( frame, "getTop2domain" ) or "" |
| end | | end |
| function p.getTop3domain( frame ) | | function p.getTop3domain( frame ) |
− | return URLutil.getTop3domain( frame.args[ 1 ] ) or "" | + | return Template( frame, "getTop3domain" ) or "" |
| end | | end |
| function p.isAuthority( frame ) | | function p.isAuthority( frame ) |
− | return URLutil.isAuthority( frame.args[ 1 ] ) and "1" or "" | + | return Template( frame, "isAuthority" ) and "1" or "" |
| end | | end |
| function p.isDomain( frame ) | | function p.isDomain( frame ) |
− | return URLutil.isDomain( frame.args[ 1 ] ) and "1" or "" | + | return Template( frame, "isDomain" ) and "1" or "" |
| end | | end |
| function p.isDomainExample( frame ) | | function p.isDomainExample( frame ) |
− | return URLutil.isDomainExample( frame.args[ 1 ] ) and "1" or "" | + | return Template( frame, "isDomainExample" ) and "1" or "" |
| end | | end |
| function p.isDomainInt( frame ) | | function p.isDomainInt( frame ) |
− | return URLutil.isDomainInt( frame.args[ 1 ] ) and "1" or "" | + | return Template( frame, "isDomainInt" ) and "1" or "" |
| end | | end |
| function p.isHost( frame ) | | function p.isHost( frame ) |
− | return URLutil.isHost( frame.args[ 1 ] ) and "1" or "" | + | return Template( frame, "isHost" ) and "1" or "" |
| + | end |
| + | function p.isHostPathResource( frame ) |
| + | return Template( frame, "isHostPathResource" ) and "1" or "" |
| end | | end |
| function p.isIP( frame ) | | function p.isIP( frame ) |
− | return URLutil.isIP( frame.args[ 1 ] ) or "" | + | return Template( frame, "isIP" ) or "" |
| end | | end |
| function p.isIPlocal( frame ) | | function p.isIPlocal( frame ) |
− | return URLutil.isIPlocal( frame.args[ 1 ] ) and "1" or "" | + | return Template( frame, "isIPlocal" ) and "1" or "" |
| end | | end |
| function p.isIPv4( frame ) | | function p.isIPv4( frame ) |
− | return URLutil.isIPv4( frame.args[ 1 ] ) and "1" or "" | + | return Template( frame, "isIPv4" ) and "1" or "" |
| end | | end |
| function p.isIPv6( frame ) | | function p.isIPv6( frame ) |
− | return URLutil.isIPv6( frame.args[ 1 ] ) and "1" or "" | + | return Template( frame, "isIPv6" ) and "1" or "" |
| end | | end |
| function p.isMailAddress( frame ) | | function p.isMailAddress( frame ) |
− | return URLutil.isMailAddress( frame.args[ 1 ] ) and "1" or "" | + | return Template( frame, "isMailAddress" ) and "1" or "" |
| end | | end |
| function p.isMailLink( frame ) | | function p.isMailLink( frame ) |
− | return URLutil.isMailLink( frame.args[ 1 ] ) and "1" or "" | + | return Template( frame, "isMailLink" ) and "1" or "" |
− | end
| |
− | function p.isProtocolMW( frame )
| |
− | return URLutil.isProtocolMW( frame.args[ 1 ] ) and "1" or ""
| |
| end | | end |
| function p.isProtocolDialog( frame ) | | function p.isProtocolDialog( frame ) |
− | return URLutil.isProtocolDialog( frame.args[ 1 ] ) and "1" or "" | + | return Template( frame, "isProtocolDialog" ) and "1" or "" |
| end | | end |
| function p.isProtocolWiki( frame ) | | function p.isProtocolWiki( frame ) |
− | return URLutil.isProtocolWiki( frame.args[ 1 ] ) and "1" or "" | + | return Template( frame, "isProtocolWiki" ) and "1" or "" |
| end | | end |
| function p.isResourceURL( frame ) | | function p.isResourceURL( frame ) |
− | return URLutil.isResourceURL( frame.args[ 1 ] ) and "1" or "" | + | return Template( frame, "isResourceURL" ) and "1" or "" |
| end | | end |
| function p.isSuspiciousURL( frame ) | | function p.isSuspiciousURL( frame ) |
− | return URLutil.isSuspiciousURL( frame.args[ 1 ] ) and "1" or "" | + | return Template( frame, "isSuspiciousURL" ) and "1" or "" |
| end | | end |
| function p.isUnescapedURL( frame ) | | function p.isUnescapedURL( frame ) |
− | return URLutil.isUnescapedURL( frame.args[ 1 ], frame.args[ 2 ] ) and "1" or "" | + | return Template( frame, "isUnescapedURL", 2 ) and "1" or "" |
| end | | end |
| function p.isWebURL( frame ) | | function p.isWebURL( frame ) |
− | return URLutil.isWebURL( frame.args[ 1 ] ) and "1" or "" | + | return Template( frame, "isWebURL" ) and "1" or "" |
| end | | end |
| function p.wikiEscapeURL( frame ) | | function p.wikiEscapeURL( frame ) |
− | return URLutil.wikiEscapeURL( frame.args[ 1 ] ) | + | return Template( frame, "wikiEscapeURL" ) |
| end | | end |
− | function p.failsafe()
| + | p.failsafe = function ( frame ) |
− | return URLutil.serial | + | local s = type( frame ) |
| + | local since |
| + | if s == "table" then |
| + | since = frame.args[ 1 ] |
| + | elseif s == "string" then |
| + | since = frame |
| + | end |
| + | if since then |
| + | since = mw.text.trim( since ) |
| + | if since == "" then |
| + | since = false |
| + | end |
| + | end |
| + | return Failsafe.failsafe( since ) or "" |
| end | | end |
| function p.URLutil() | | function p.URLutil() |