Module:Byte layout: Difference between revisions
Jump to navigation
Jump to search
(WIP: Parser for a simple data-structure DSL, because templates only let you pass in strings) |
(WIP: Add doc comments, parse unquoted values into numbers) |
||
Line 1: | Line 1: | ||
local p = {} | local p = {} | ||
--[[ | |||
Parse a string of key-value pairs into a Lua table. | |||
Examples: | |||
parse_string("foo: 123, bar: 456") -- returns {foo=123, bar=456} | |||
parse_string("foo, bar: named value, baz") -- returns {"foo", bar="named value", "baz"} | |||
parse_string("foo: (A:B:C), bar: (1,2,3)") -- returns {foo="A:B:C", bar="1,2,3"} | |||
]] | |||
function p.parse_string(s) | function p.parse_string(s) | ||
local result = {} | local result = {} | ||
Line 46: | Line 54: | ||
return result | return result | ||
end | end | ||
Line 79: | Line 63: | ||
-- Attempt to consume an unquoted value, e.g., the "bar" in "foo: bar" | -- Attempt to consume an unquoted value, e.g., the "bar" in "foo: bar" | ||
function match_unquoted_value(s, init) | function match_unquoted_value(s, init) | ||
local value, i = match_pattern(s, "^(([^,:()]+)%s*)", init) | |||
local numeric = tonumber(value) | |||
if numeric then | |||
value = numeric | |||
end | |||
return value, i | |||
end | end | ||
Line 97: | Line 86: | ||
function match_whitespace(s, init) | function match_whitespace(s, init) | ||
return match_pattern(s, "^((%s*))", init) | return match_pattern(s, "^((%s*))", init) | ||
end | |||
--[[ | |||
Attempts to "consume" a pattern from a string. Example: | |||
s = "foo bar baz" | |||
offset = 1 | |||
word, offset = match_pattern(s, "((foo)%s*)", offset) | |||
-- word = "foo", offset = 5 | |||
word, offset = match_pattern(s, "((bar)%s*)", offset) | |||
-- word = "bar", offset = 9 | |||
word, offset = match_pattern(s, "((whoops!)%s*)", offset) | |||
-- word = nil, offset = 9 | |||
-- (offset doesn't change on failure) | |||
Pattern must have two capture groups: the entire string, and the portion | |||
of interest (the thing that is returned along with the new offset). | |||
]] | |||
function match_pattern(s, pattern, init) | |||
full, content = string.match(s, pattern, init) | |||
if full == nil then | |||
return nil, init | |||
end | |||
return content, init + #full | |||
end | end | ||
return p | return p |
Latest revision as of 06:21, 8 February 2021
Documentation for this module may be created at Module:Byte layout/doc
local p = {} --[[ Parse a string of key-value pairs into a Lua table. Examples: parse_string("foo: 123, bar: 456") -- returns {foo=123, bar=456} parse_string("foo, bar: named value, baz") -- returns {"foo", bar="named value", "baz"} parse_string("foo: (A:B:C), bar: (1,2,3)") -- returns {foo="A:B:C", bar="1,2,3"} ]] function p.parse_string(s) local result = {} -- Strip leading whitespace local i = 1 local token = "" token, i = match_whitespace(s, i) -- Parse each key-value pair while i <= #s do local key = nil local value = nil -- optional key, if present token, i = match_key(s, i) if token ~= nil then key = token end -- value token, i = match_unquoted_value(s, i) if token == nil then token, i = match_quoted_value(s, i) end if token == nil then error("Parse error: expected a value at position " .. i .. " of: " .. s) end value = token -- Save key:value pair -- No key means append to end of table if key == nil then table.insert(result, value) else result[key] = value end -- Consume separator between key-value pairs token, i = match_separator(s, i) if token == nil and i <= #s then error("Parse error: expected comma at position " .. i .. " of: " .. s) end end return result end -- Attempt to consume a key name, e.g., the "foo" in "foo: bar" function match_key(s, init) return match_pattern(s, "^(([%a_][%w_]*)%s*:%s*)", init) end -- Attempt to consume an unquoted value, e.g., the "bar" in "foo: bar" function match_unquoted_value(s, init) local value, i = match_pattern(s, "^(([^,:()]+)%s*)", init) local numeric = tonumber(value) if numeric then value = numeric end return value, i end -- Attempt to consume a quoted value, e.g., the ":::" in "dots: (:::)" function match_quoted_value(s, init) local quoted, i = match_pattern(s, "^((%b())%s*)", init) local unquoted = string.sub(quoted, 2, -2) return unquoted, i end -- Attempt to match a comma, e.g., the "," in "foo: 1, bar: 2" function match_separator(s, init) return match_pattern(s, "^((,)%s*)", init) end -- Attempt to match leading whitespace, e.g., the " " in " foo:bar" function match_whitespace(s, init) return match_pattern(s, "^((%s*))", init) end --[[ Attempts to "consume" a pattern from a string. Example: s = "foo bar baz" offset = 1 word, offset = match_pattern(s, "((foo)%s*)", offset) -- word = "foo", offset = 5 word, offset = match_pattern(s, "((bar)%s*)", offset) -- word = "bar", offset = 9 word, offset = match_pattern(s, "((whoops!)%s*)", offset) -- word = nil, offset = 9 -- (offset doesn't change on failure) Pattern must have two capture groups: the entire string, and the portion of interest (the thing that is returned along with the new offset). ]] function match_pattern(s, pattern, init) full, content = string.match(s, pattern, init) if full == nil then return nil, init end return content, init + #full end return p