Module:String

--- String provides access to basic string functions. -- This module allows simple text manipulation and is dozens of times -- more efficient that its parser function counterparts. -- --  MediaWiki's native string functions have a upper limit defined by --  `wgPFStringLengthLimit` as 1000 characters - this module lacks any -- such limit and is much more performant. -- --  The majority of template functions provided can be invoked with named -- parameters, unnamed parameters, or a mixture. If named parameters -- are used, Mediawiki will automatically remove leading or trailing -- whitespace from the parameter - see @{str._getParameters}. -- --  Template options: --  * `ignore_errors`: If set to `true` or `1`, any error condition will -- result in an empty string being returned rather than an error -- message. --  * `error_category`: If an error occurs, specifies the name of a --  category to include with the error message. The default category is -- [ Category:Errors -- reported by Module String]. --  * `no_category`: If set to `true` or `1`, no category will be added -- if an error is generated. -- --  @module             str -- @author             Dessamator -- @credit             Anomie (Wikipedia) -- @attribution        Wikipedia -- @see                Wikipedia documentation -- @see                Test suite for module -- --

local str = {}

-- Module dependencies. local ustring, text = mw.ustring, mw.text local entrypoint = require( 'Dev:Entrypoint' ) local yesno = require( 'Dev:Yesno' )

-- Template functions.

--- Computes the length of the target string. -- @function           str.len -- @param              {table} frame Invocation frame object. -- @param              {table} frame.args Invocation/template arguments. -- @param[opt]         {string} frame.args.s Target Unicode string. -- @return             {number} Length of Unicode string. -- @usage -- @usage function str.len( frame ) local args = str._getParameters( frame.args, {'s'} ) local s = args['s'] or '' return ustring.len( s ) end

--- Extracts a substring of the target string at specified indices. -- Indexing is **1-based** for the target string to extract from. If -- either `i` or `j` is a negative value, it is interpreted the same as --  selecting a character by counting from the end of the string. -- Hence, a value of `-1` is the same as selecting the last character -- of the string. -- --  If the requested indices are out of range for the given string, an --  error is reported. -- --  @function           str.sub -- @param              {table} frame Invocation frame object. -- @param              {table} frame.args Invocation/template arguments. -- @param              {string} frame.args.s The string to return a --                      subset of. -- @param              {string} frame.args.i The fist index of the substring --                     to return; default: `1`. -- @param              {string} frame.args.j: The last index of the string --                     to return; default: `#s`. -- @error[97]          {string} 'string subset index out of range' -- @error[100]         {string} 'string subset indices out of order' -- @return             Substring from `i`/`1` to `j`/`#s`. -- @usage -- @usage function str.sub( frame ) local args = str._getParameters( frame.args, { 's', 'i', 'j' } ) local s = args['s'] or '' local i = tonumber( args['i'] ) or 1 local j = tonumber( args['j'] ) or -1

local len = ustring.len( s )

-- Convert negatives for range checking if i < 0 then i = len + i + 1 end if j < 0 then j = len + j + 1 end

if i > len or j > len or i < 1 or j < 1 then return str._error( 'string subset index out of range', frame ) end if j < i then return str._error( 'string subset indices out of order', frame ) end

return ustring.sub( s, i, j ) end

--- Implements ``. -- This function is kept in order to maintain these older templates. -- Indexing is **0-based** for the substring start position. -- --  @function           str.sublength -- @param              {table} frame Invocation frame object. -- @param              {table} frame.args Invocation/template arguments. -- @param              {string} frame.args.s Source string to search. -- @param              {string} frame.args.i Index to begin output --                     substring at. Default: `0`. -- @param              {string} frame.args.len Length of output substring. -- @return             {string} Substring starting with `i`/`0` of --                      length `len`. -- @warning            This function is deprecated in favor of @{str.sub}. function str.sublength( frame ) local args = str._getParameters( frame.args, { 's', 'i', 'len' } ) local i = tonumber( args['i'] ) or 0 local len = tonumber( args['len'] ) return ustring.sub( args['s'], i + 1, len and ( i + len ) ) end

--- Extracts a substring matching a pattern from the source string. -- If `match` or `start` are out of range for the string being queried, -- then this function generates an error. An error is also generated if -- no match is found. If one adds the parameter ignore_errors=true, then -- the error will be suppressed and an empty string will be returned on --  any failure. -- --  @function           str.match -- @param              {table} frame Invocation frame object. -- @param              {table} frame.args Invocation/template arguments. -- @param              {string} frame.args.s Target string to search. -- @param              {string} frame.args.pattern The pattern or string --                     to find within the string. -- @param              {string} frame.args.start The index within the --                     source string to start the search. The first --                     character of the string has index 1. Default: `1`. -- @param              {string} frame.args.match In some cases it may be --                      possible to make multiple matches on a single --                     string. This specifies which match to return, --                     where the first match is `match = 1`. If a --                     negative number is specified then a match is --                      returned counting from the last match. Hence --                     `match = -1` is the same as requesting the last --                     match. Default: `1`. -- @param              {string} frame.args.plain A flag indicating that --                     the pattern should be understood as a literal. --                     Default: `false`. -- @param              {string} frame.args.nomatch If no match is found, --                     output the "nomatch" value rather than an error. -- @error[177]         {string} 'target string is empty' -- @error[180]         {string} 'pattern string is empty' -- @error[183]         {string} 'requested start is out of range' -- @error[186]         {string} 'match index is out of range' -- @error[226]         {string} 'match not found' -- @return             {string} Substring of the source string matching --                     a pattern or string literal. -- @usage --     --      --      --  @usage --     --      --      --  @see                Native patterns -- @see                Unicode patterns function str.match( frame ) local args = str._getParameters( frame.args, { 's', 'pattern', 'start', 'match', 'plain', 'nomatch' } ) local s = args['s'] or '' local start = tonumber( args['start'] ) or 1 local plain_flag = str._getBoolean( args['plain'] or false ) local pattern = args['pattern'] or '' local match_index = math.floor( tonumber( args['match'] ) or 1 ) local nomatch = args['nomatch']

if s == '' then return str._error( 'target string is empty', frame ) end if pattern == '' then return str._error( 'pattern string is empty', frame ) end if math.abs( start ) < 1 or math.abs( start ) > ustring.len( s ) then return str._error( 'requested start is out of range', frame ) end if match_index == 0 then return str._error( 'match index is out of range', frame ) end if plain_flag then pattern = str._escapePattern( pattern ) end

local result if match_index == 1 then -- Find first match is simple case result = ustring.match( s, pattern, start ) else if start > 1 then s = ustring.sub( s, start ) end

local iterator = ustring.gmatch( s, pattern ) if match_index > 0 then -- Forward search for w in iterator do               match_index = match_index - 1 if match_index == 0 then result = w                   break end end else -- Reverse search local result_table = {} local count = 1 for w in iterator do               result_table[count] = w                count = count + 1 end

result = result_table[ count + match_index ] end end

if result == nil then return nomatch == nil and str._error( 'match not found', frame ) or nomatch else return result end end

--- Returns a single character from the target string. -- Indexing is **1-based** for the target string position. -- --  If one requests a negative value, this function will select a --  character by counting backwards from the end of the string. In other -- words, `pos = -1` is the same as asking for the last character. -- --  A requested value of zero, or a value greater than the length of the -- string returns an error. -- --  @function           str.pos -- @param              {table} frame Invocation frame object. -- @param              {table} frame.args Invocation/template arguments. -- @param              {string} frame.args.target The string to search. -- @param              {string} frame.args.pos The index for the character --                     to return. Can be negative for reverse indexing. -- @error[259]         {string} 'string index out of range' -- @return             {string} Single character at position `pos`. -- @usage -- @usage function str.pos( frame ) local args = str._getParameters( frame.args, { 'target', 'pos' } ) local target = args['target'] or '' local pos = tonumber( args['pos'] ) or 0

if pos == 0 or math.abs( pos ) > ustring.len( target ) then return str._error( 'string index out of range', frame ) end

return ustring.sub( target, pos, pos ) end

--- Searches for a target string/pattern in a string. -- --  This function should be safe for UTF-8 strings. -- --  @function           str.find -- @param              {table} frame Invocation frame object. -- @param              {table} frame.args Invocation/template arguments. -- @param              {string} frame.args.source The string to search. -- @param              {string} frame.args.target The string or pattern --                     to find within the `source` string. -- @param              {string} frame.args.start The index within the source --                     string to start the search. Default: `1`. -- @param              {string} frame.args.plain Boolean flag indicating --                     that target should be understood as a literal and --                     not as a Lua style regular expression. Default: `true`. -- @return             First index >= `start`/`1` that `target` is found --                     within `source`. Indexing is **1-based**. If `target` --                     is not found, then this function returns 0. If --                     either "source" or "target" are missing / empty, this --                     function also returns 0. -- @usage --     --      --      --  @usage --     --      --      function str.find( frame ) local args = str._getParameters( frame.args, { 'source', 'target', 'start', 'plain' } ) local source = args['source'] or '' local pattern = args['target'] or '' local start_pos = tonumber( args['start'] ) or 1 local plain = args['plain'] or true

if source ==  or pattern ==  then return 0 end

plain = str._getBoolean( plain )

local start = ustring.find( source, pattern, start_pos, plain )

return start or 0 end

--- Duplicates the behavior of `` including its quirks. -- This is provided in order to support older templates. -- --  @function           str.str_find -- @param              {table} frame Invocation frame object. -- @param              {table} frame.args Invocation/template arguments. -- @param              {string} frame.args.source Source string to find a --                      match in. -- @param              {string} frame.args.target Search string within `source`. -- @return             The first index in `source` that is a match to --                      `target`. Indexing is **1-based**, and the function --                     returns `-1` if the `target` string is not present --                     in `source`. -- @warning            This function is deprecated in favour of @{str.find}. -- @note               If the "target" string is empty / missing, this --                     function returns a value of `'1'`, which is --                      generally unexpected behavior, and must be --                      accounted for separatetly. function str.str_find( frame ) local args = str._getParameters( frame.args, { 'source', 'target' } ) local source = args['source'] or '' local target = args['target'] or ''

if target == '' then return 1 end

local start = ustring.find( source, target, 1, true )

return start or -1 end

--- Determines the presence of a prefix in a string. -- @function           str.prefix -- @param              {table} frame Invocation frame object. -- @param              {table} frame.args Invocation/template arguments. -- @param              {string} frame.args.source Source string to test. -- @param              {string} frame.args.prefix Suffix to test for. -- @return             {string} Boolean flag indicating prefix presence. -- @usage -- @usage function str.prefix( frame ) local args = str._getParameters( frame.args, { 'source', 'prefix' } ) local source = args['source'] or '' local prefix = args['prefix'] or '' if prefix == '' then return 'yes' -- All strings end with the empty string. end return ustring.sub( source, 1, ustring.len(prefix) ) == prefix and 'yes' or 'no' end

--- Determines the presence of a suffix in a string. -- @function           str.suffix -- @param              {table} frame Invocation frame object. -- @param              {table} frame.args Invocation/template arguments. -- @param              {string} frame.args.source Source string to test. -- @param              {string} frame.args.suffix Suffix to test for. -- @return             {string} Boolean flag indicating suffix presence. -- @usage -- @usage function str.suffix( frame ) local args = str._getParameters( frame.args, { 'source', 'suffix' } ) local source = args['source'] or '' local suffix = args['suffix'] or '' if suffix == '' then return 'yes' -- All strings end with the empty string. end return ustring.sub( source, -ustring.len(suffix), -1 ) == suffix and 'yes' or 'no' end

--- Counts the number of occurrences of one string in another. -- @function           str.count -- @param              {table} frame Invocation frame object. -- @param              {table} frame.args Invocation/template arguments. -- @param              {string} frame.args.source Source string to count --                     occurences in. -- @param              {string} frame.args.pattern Lua pattern or string --                     to match against. -- @param[opt]         {string} frame.args.plain Boolean flag indicating --                     that pattern should be understood as a literal --                     and not as a Lua style regular expression. --                     Default: `'true'`. -- @return             {number} Number of occurences in target string. -- @usage --     --      --      --  @usage --     --      --      function str.count( frame ) local args = str._getParameters( frame.args, { 'source', 'pattern', 'plain' } ) local source = args['source'] or '' local pattern = args['pattern'] or '' local plain = str._getBoolean( args.plain or 'true' ) if plain then pattern = str._escapePattern( pattern ) end return select( 2, ustring.gsub( source, pattern, '' ) ) end

--- Replaces a target string or pattern within another string. -- --  @function           str.replace -- @param              {table} frame Invocation frame object. -- @param              {table} frame.args Invocation/template arguments. -- @param              {string} frame.args.source The string to search. -- @param              {string} frame.args.pattern The string or pattern --                     to find within the source. -- @param              {string} frame.args.replace The replacement text -- @param              {string} frame.args.count The number of occurences --                     to replace. Defaults to all occurences. -- @param              {string} frame.args.plain Boolean flag indicating --                     that pattern should be understood as a literal --                     and not as a Lua style regular expression. --                     Default: `'true'`. -- @usage --    --      --     -- @usage --    --     --     function str.replace( frame ) local args = str._getParameters( frame.args, { 'source', 'pattern', 'replace', 'count', 'plain' } ) local source = args['source'] or '' local pattern = args['pattern'] or '' local replace = args['replace'] or '' local count = tonumber( args['count'] ) local plain = args['plain'] or true

if source ==  or pattern ==  then return source_str end plain = str._getBoolean( plain )

if plain then pattern = str._escapePattern( pattern ) --Only need to escape replacement sequences. replace = ustring.gsub( replace, "%%", "%%%%" ); end

local result

if count ~= nil then result = ustring.gsub( source, pattern, replace, count ) else result = ustring.gsub( source, pattern, replace ) end

return result end

--- Repeats a string $$\{n \in \N \}$$ times. -- A simple template pipe for the `string.rep` Lua function. -- @function           str.rep -- @param              {table} frame Invocation frame object. -- @param              {table} frame.args Invocation/template arguments. -- @param              {string} frame.args.source Source string to repeat. -- @param              {string} frame.args.count Integer for number of --                      output repetitions. -- @return             {string} String with repeated copies of `source`. -- @error[502]         {string} 'function rep expects a number as second --                     parameter, received $count' -- @usage -- @usage function str.rep( frame ) local args = str._getParameters( frame.args, {'source', 'count' } ) local repetitions = tonumber( args['count'] ) if not repetitions then return str._error( 'function rep expects a number as second parameter, received "' .. ( args['count'] or 'nil' ) .. '"', frame ) end return ustring.rep( args['source'] or '', repetitions ) end

--- Convert string to lowercase Unicode character sequence. -- @function           str.lc --  @param              {table} frame Invocation frame object. -- @param              {table} frame.args Invocation/template arguments. -- @param              {string} frame.args.source Source string to change case. -- @return             Lowercase UTF-8 string. -- @usage -- @usage function str.lc( frame ) return ustring.lower( frame.args['source'] or frame.args[1] ) end

--- Convert string to uppercase Unicode character sequence. -- @function           str.uc --  @param              {table} frame Invocation frame object. -- @param              {table} frame.args Invocation/template arguments. -- @param              {string} frame.args.str String of indeterminate case. -- @return             Uppercase UTF-8 string. -- @usage -- @usage function str.uc( frame ) return ustring.upper( frame.args['source'] or frame.args[1] ) end

--- Convert string prefix to lowercase Unicode character. -- @function           str.lcfirst -- @param              {table} frame Invocation frame object. -- @param              {table} frame.args Invocation/template arguments. -- @param              {string} frame.args.source String of indeterminate case. -- @return             UTF-8 string with lowercase prefix letter. -- @usage -- @usage function str.lcfirst( frame ) return ( ustring.gsub( frame.args['source'] or frame.args[1], '^%u', ustring.lower ) ) end

--- Convert string prefix to uppercase Unicode character. -- @function           str.ucfirst -- @param              {table} frame Invocation frame object. -- @param              {table} frame.args Invocation/template arguments. -- @param              {string} frame.args.source Source string to change case. -- @return             UTF-8 string with uppercase prefix letter. -- @usage -- @usage function str.ucfirst( frame ) return ( ustring.gsub( frame.args['source'] or frame.args[1], '^%l', ustring.upper ) ) end

--- Pads beginning of a string with a character or whitespace. -- @function           str.padleft -- @param              {table} frame Invocation frame object. -- @param              {table} frame.args Invocation/template arguments. -- @param              {string} frame.args.str Source string to pad. -- @param              {string} frame.args.len Length of output string. -- @param[opt]         {string} frame.args.char Start padding character. --                     Default: `' '`. -- @return             {string} String padded to the left. -- @usage --    --     --     --  @usage --    --     --     function str.padleft( frame ) local args = str._getParameters( frame.args, { 'source', 'len', 'char' } ) local source = args['source'] or '' local len = tonumber( args['len'] ) or 0 local char = ( args['char'] or ' ' ):sub( 1, 1 ) return char:rep( len - #source ) .. source end

--- Pads end of a string with a character or whitespace. -- @function           str.padright -- @param              {table} frame Invocation frame object. -- @param              {table} frame.args Invocation/template arguments. -- @param              {string} frame.args.source Source string to pad. -- @param              {string} frame.args.len Length of output string. -- @param[opt]         {string} frame.args.char End padding character. --                     Default: `' '`. -- @return             {string} String padded to the right. -- @usage --    --     --     --  @usage --    --     --     function str.padright( frame ) local args = str._getParameters( frame.args, { 'source', 'len', 'char' } ) local source = args['source'] or '' local len = tonumber( args['len'] ) or 0 local char = ( args['char'] or ' ' ):sub( 1, 1 ) return source .. char:rep( len - #source ) end

--- Return delimited string piece, like PHP's `explode`. -- Indexing is **0-based** to match the behavior of `` -- parser function. -- --  @function           str.explode -- @param              {table} frame Invocation frame object. -- @param              {table} frame.args Invocation/template arguments. -- @param              {string} frame.args.source Delimited string to split. -- @param              {string} frame.args.dlm Symbol or character to split --                     with. -- @param[opt]         {string} frame.args.pos Initial piece position. --                     Default: `0`. -- @param[opt]         {string} frame.args.lim Maximum number of pieces to --                      append. Default: `1`. -- @return             {string} Percent-encoded string. -- @usage --    --     --     --  @usage --    --     --     function str.explode( frame ) local args = str._getParameters( frame.args, { 'source', 'dlm', 'pos', 'lim' } ) local source = args['source'] or '' local delim = str._escapePattern( args['dlm'] or ' ' ) local pos = ( tonumber( args['pos'] or '0' ) + 1 ) local pieces = text.split(source, delim) local limit = tonumber( args['lim'] ) or #pieces local dividers = {} for div in ustring.gmatch(source, delim) do       table.insert(dividers, div) end if limit < #pieces then for index, value in ipairs(pieces) do           if index > limit then pieces[limit] = pieces[limit] .. dividers[index-1] .. value end end for index, value in ipairs(pieces) do           if index > limit then pieces[index] = nil end end end if pos < 1 then pos = #pieces + pos end return pieces[pos] or '' end

--- Percent-encoding for strings. -- @function           str.urlencode -- @param              {table} frame Invocation frame object. -- @param              {table} frame.args Invocation/template arguments. -- @param              {string} frame.args.source Source string to encode. -- @param              {string} frame.args.code Encoding type (`QUERY`, `PATH`, `WIKI`). -- @return             {string} Percent-encoded string. -- @usage -- @usage function str.urlencode( frame ) local args = str._getParameters( frame.args, { 'source', 'code' } ) local code = args['code'] or 'QUERY' return mw.uri.encode( args['source'], args['code'] ) end

--- URL decoding for strings. -- @function           str.urldecode -- @param              {table} frame Invocation frame object. -- @param              {table} frame.args Invocation/template arguments. -- @param              {string} frame.args.source Source string to decode. -- @param              {string} frame.args.code Encoding type (`QUERY`, `PATH`, `WIKI`). -- @return             {string} Percent-decoded string. -- @usage -- @usage function str.urldecode( frame ) local args = str._getParameters( frame.args, { 'source', 'code' } ) local code = args['code'] or 'QUERY' return mw.uri.decode( args['source'], args['code'] ) end

--- Helper functions. -- @section            str.utils

--- Populates an argument list with both named/unnamed parameters. -- This is relevant because named parameters are not identical to unnamed -- parameters due to string trimming, and when dealing with strings we --  sometimes want to either preserve or remove that whitespace -- depending on the application. -- @function           str._getParameters -- @param              {table} frame_args Table of sequential and named arguments. -- @param              {table} arg_list Array of parameter names. -- @return             {table} Map of named arguments corresponding to `arg_list`. function str._getParameters( frame_args, arg_list ) local new_args = {} local index = 1 local value

for i,arg in ipairs( arg_list ) do       value = frame_args[arg] if value == nil then value = frame_args[index] index = index + 1 end new_args[arg] = value end

return new_args end

--- Helper function to handle error messages. -- @function           str._error -- @param              {string} error_str Error string to display to user. -- @param              {table} frame Current frame object (from string template or module). -- @return             {string} Optional error message, with or without categorisation. function str._error( exception, frame ) local category = frame.args['error_category'] or 'Errors reported by Module String' local silent = frame.args['ignore_errors'] or false local anonymous = frame.args['no_category'] or false

if str._getBoolean( silent ) then return '' end

local exception = ' String Module Error: ' .. exception .. ' '   if #category > 0 and not str._getBoolean( anonymous ) then exception = '' .. exception end

return exception end

--- Helper function to interpret boolean strings. -- @function           str._getBoolean -- @param              {string} str Boolean-like wikitext string. -- @return             {boolean} Boolean value corresponding to `str`. function str._getBoolean( str ) return yesno( str, true ) or false end

--- Helper function that escapes all pattern characters. -- This allows patterns to be treated as plain text. -- @function           str._escapePattern -- @param              {string} pattern_str Lua pattern string with special characters. -- @return             {string} Escaped Lua pattern string for literal string matches. function str._escapePattern( pattern_str ) return ustring.gsub( pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1" ) end

--- Wrapper function for string template. -- @function           str.main -- @usage str.main = entrypoint( str )

return str