Module:Byte layout

From Wiki of ZZT
Revision as of 05:47, 8 February 2021 by Quantum (talk | contribs) (WIP: Parser for a simple data-structure DSL, because templates only let you pass in strings)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

Documentation for this module may be created at Module:Byte layout/doc

local p = {}

function p.parse_string(s)
    local result = {}

    -- Strip leading whitespace
    local i = 1
    local token = ""
    token, i = match_whitespace(s, i)

    -- Parse each key-value pair
    while i <= #s do
        local key = nil
        local value = nil

        -- optional key, if present
        token, i = match_key(s, i)
        if token ~= nil then
            key = token
        end

        -- value
        token, i = match_unquoted_value(s, i)
        if token == nil then
            token, i = match_quoted_value(s, i)
        end
        if token == nil then
            error("Parse error: expected a value at position " .. i .. " of: " .. s)
        end
        value = token

        -- Save key:value pair
        -- No key means append to end of table
        if key == nil then
            table.insert(result, value)
        else
            result[key] = value
        end

        -- Consume separator between key-value pairs
        token, i = match_separator(s, i)
        if token == nil and i <= #s then 
            error("Parse error: expected comma at position " .. i .. " of: " .. s)
        end
    end

    return result
end

--[[
    Attempts to "consume" a pattern from a string. Example:

    s = "foo bar baz"
    offset = 1
    word, offset = match_pattern(s, "((foo)%s*)", offset)
    -- word = "foo", offset = 5
    word, offset = match_pattern(s, "((bar)%s*)", offset)
    -- word = "bar", offset = 9
    word, offset = match_pattern(s, "((whoops!)%s*)", offset)
    -- word = nil, offset = 9
    -- (offset doesn't change on failure)

    Pattern must have two capture groups: the entire string, and the portion
    of interest (the thing that is returned along with the new offset).
]]
function match_pattern(s, pattern, init)
    full, content = string.match(s, pattern, init)
    if full == nil then
        return nil, init
    end
    return content, init + #full
end

-- Attempt to consume a key name, e.g., the "foo" in "foo: bar"
function match_key(s, init)
    return match_pattern(s, "^(([%a_][%w_]*)%s*:%s*)", init)
end

-- Attempt to consume an unquoted value, e.g., the "bar" in "foo: bar"
function match_unquoted_value(s, init)
    return match_pattern(s, "^(([^,:()]+)%s*)", init)
end

-- Attempt to consume a quoted value, e.g., the ":::" in "dots: (:::)"
function match_quoted_value(s, init)
    local quoted, i = match_pattern(s, "^((%b())%s*)", init)
    local unquoted = string.sub(quoted, 2, -2)
    return unquoted, i
end

-- Attempt to match a comma, e.g., the "," in "foo: 1, bar: 2"
function match_separator(s, init)
    return match_pattern(s, "^((,)%s*)", init)
end

-- Attempt to match leading whitespace, e.g., the "  " in "  foo:bar"
function match_whitespace(s, init)
    return match_pattern(s, "^((%s*))", init)
end

return p