Module:Byte layout: Difference between revisions
Jump to navigation
Jump to search
(WIP: Parser for a simple data-structure DSL, because templates only let you pass in strings) |
(WIP: Add doc comments, parse unquoted values into numbers) |
||
| Line 1: | Line 1: | ||
local p = {} | local p = {} | ||
--[[ | |||
Parse a string of key-value pairs into a Lua table. | |||
Examples: | |||
parse_string("foo: 123, bar: 456") -- returns {foo=123, bar=456} | |||
parse_string("foo, bar: named value, baz") -- returns {"foo", bar="named value", "baz"} | |||
parse_string("foo: (A:B:C), bar: (1,2,3)") -- returns {foo="A:B:C", bar="1,2,3"} | |||
]] | |||
function p.parse_string(s) | function p.parse_string(s) | ||
local result = {} | local result = {} | ||
| Line 46: | Line 54: | ||
return result | return result | ||
end | end | ||
| Line 79: | Line 63: | ||
-- Attempt to consume an unquoted value, e.g., the "bar" in "foo: bar" | -- Attempt to consume an unquoted value, e.g., the "bar" in "foo: bar" | ||
function match_unquoted_value(s, init) | function match_unquoted_value(s, init) | ||
local value, i = match_pattern(s, "^(([^,:()]+)%s*)", init) | |||
local numeric = tonumber(value) | |||
if numeric then | |||
value = numeric | |||
end | |||
return value, i | |||
end | end | ||
| Line 97: | Line 86: | ||
function match_whitespace(s, init) | function match_whitespace(s, init) | ||
return match_pattern(s, "^((%s*))", init) | return match_pattern(s, "^((%s*))", init) | ||
end | |||
--[[ | |||
Attempts to "consume" a pattern from a string. Example: | |||
s = "foo bar baz" | |||
offset = 1 | |||
word, offset = match_pattern(s, "((foo)%s*)", offset) | |||
-- word = "foo", offset = 5 | |||
word, offset = match_pattern(s, "((bar)%s*)", offset) | |||
-- word = "bar", offset = 9 | |||
word, offset = match_pattern(s, "((whoops!)%s*)", offset) | |||
-- word = nil, offset = 9 | |||
-- (offset doesn't change on failure) | |||
Pattern must have two capture groups: the entire string, and the portion | |||
of interest (the thing that is returned along with the new offset). | |||
]] | |||
function match_pattern(s, pattern, init) | |||
full, content = string.match(s, pattern, init) | |||
if full == nil then | |||
return nil, init | |||
end | |||
return content, init + #full | |||
end | end | ||
return p | return p | ||
Latest revision as of 06:21, 8 February 2021
Documentation for this module may be created at Module:Byte layout/doc
local p = {}
--[[
Parse a string of key-value pairs into a Lua table.
Examples:
parse_string("foo: 123, bar: 456") -- returns {foo=123, bar=456}
parse_string("foo, bar: named value, baz") -- returns {"foo", bar="named value", "baz"}
parse_string("foo: (A:B:C), bar: (1,2,3)") -- returns {foo="A:B:C", bar="1,2,3"}
]]
function p.parse_string(s)
local result = {}
-- Strip leading whitespace
local i = 1
local token = ""
token, i = match_whitespace(s, i)
-- Parse each key-value pair
while i <= #s do
local key = nil
local value = nil
-- optional key, if present
token, i = match_key(s, i)
if token ~= nil then
key = token
end
-- value
token, i = match_unquoted_value(s, i)
if token == nil then
token, i = match_quoted_value(s, i)
end
if token == nil then
error("Parse error: expected a value at position " .. i .. " of: " .. s)
end
value = token
-- Save key:value pair
-- No key means append to end of table
if key == nil then
table.insert(result, value)
else
result[key] = value
end
-- Consume separator between key-value pairs
token, i = match_separator(s, i)
if token == nil and i <= #s then
error("Parse error: expected comma at position " .. i .. " of: " .. s)
end
end
return result
end
-- Attempt to consume a key name, e.g., the "foo" in "foo: bar"
function match_key(s, init)
return match_pattern(s, "^(([%a_][%w_]*)%s*:%s*)", init)
end
-- Attempt to consume an unquoted value, e.g., the "bar" in "foo: bar"
function match_unquoted_value(s, init)
local value, i = match_pattern(s, "^(([^,:()]+)%s*)", init)
local numeric = tonumber(value)
if numeric then
value = numeric
end
return value, i
end
-- Attempt to consume a quoted value, e.g., the ":::" in "dots: (:::)"
function match_quoted_value(s, init)
local quoted, i = match_pattern(s, "^((%b())%s*)", init)
local unquoted = string.sub(quoted, 2, -2)
return unquoted, i
end
-- Attempt to match a comma, e.g., the "," in "foo: 1, bar: 2"
function match_separator(s, init)
return match_pattern(s, "^((,)%s*)", init)
end
-- Attempt to match leading whitespace, e.g., the " " in " foo:bar"
function match_whitespace(s, init)
return match_pattern(s, "^((%s*))", init)
end
--[[
Attempts to "consume" a pattern from a string. Example:
s = "foo bar baz"
offset = 1
word, offset = match_pattern(s, "((foo)%s*)", offset)
-- word = "foo", offset = 5
word, offset = match_pattern(s, "((bar)%s*)", offset)
-- word = "bar", offset = 9
word, offset = match_pattern(s, "((whoops!)%s*)", offset)
-- word = nil, offset = 9
-- (offset doesn't change on failure)
Pattern must have two capture groups: the entire string, and the portion
of interest (the thing that is returned along with the new offset).
]]
function match_pattern(s, pattern, init)
full, content = string.match(s, pattern, init)
if full == nil then
return nil, init
end
return content, init + #full
end
return p