Add more functionality to the ErLang parser (#2390)

* ERLang parser support for empty lists
* ERLang add support for single quote strings
* ERLang parser support for comments
---------
Signed-off-by: Laurent Goderre <laurent.goderre@docker.com>
This commit is contained in:
Laurent Goderre 2023-12-22 09:45:20 -05:00 committed by GitHub
parent 63e7a004cb
commit 3a6b6562d1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 88 additions and 1 deletions

View File

@ -2,6 +2,7 @@ package erlang
import ( import (
"bytes" "bytes"
"errors"
"fmt" "fmt"
"io" "io"
"strings" "strings"
@ -12,6 +13,8 @@ type erlangNode struct {
value interface{} value interface{}
} }
var errSkipComments = errors.New("")
func (e erlangNode) Slice() []erlangNode { func (e erlangNode) Slice() []erlangNode {
out, ok := e.value.([]erlangNode) out, ok := e.value.([]erlangNode)
if ok { if ok {
@ -56,6 +59,10 @@ func parseErlang(reader io.Reader) (erlangNode, error) {
i := 0 i := 0
for i < len(data) { for i < len(data) {
item, err := parseErlangBlock(data, &i) item, err := parseErlangBlock(data, &i)
if err == errSkipComments {
skipWhitespace(data, &i)
continue
}
if err != nil { if err != nil {
return node(nil), fmt.Errorf("%w\n%s", err, printError(data, i)) return node(nil), fmt.Errorf("%w\n%s", err, printError(data, i))
} }
@ -141,11 +148,26 @@ func parseErlangNode(data []byte, i *int) (erlangNode, error) {
c := data[*i] c := data[*i]
switch c { switch c {
case '[', '{': case '[', '{':
offset := *i + 1
skipWhitespace(data, &offset)
c2 := data[offset]
// Add support for empty lists
if (c == '[' && c2 == ']') || (c == '{' && c2 == '}') {
*i = offset + 1
return node(nil), nil
}
return parseErlangList(data, i) return parseErlangList(data, i)
case '"': case '"':
fallthrough
case '\'':
return parseErlangString(data, i) return parseErlangString(data, i)
case '<': case '<':
return parseErlangAngleString(data, i) return parseErlangAngleString(data, i)
case '%':
parseErlangComment(data, i)
return node(nil), errSkipComments
} }
if isLiteral(c) { if isLiteral(c) {
@ -205,7 +227,7 @@ func parseErlangString(data []byte, i *int) (erlangNode, error) {
buf.WriteByte(c) buf.WriteByte(c)
*i++ *i++
} }
return node(buf.String()), nil return node(nil), fmt.Errorf("unterminated string at %d", *i)
} }
func parseErlangList(data []byte, i *int) (erlangNode, error) { func parseErlangList(data []byte, i *int) (erlangNode, error) {
@ -216,6 +238,10 @@ func parseErlangList(data []byte, i *int) (erlangNode, error) {
for *i < len(data) { for *i < len(data) {
item, err := parseErlangNode(data, i) item, err := parseErlangNode(data, i)
if err != nil { if err != nil {
if err == errSkipComments {
skipWhitespace(data, i)
continue
}
return node(nil), err return node(nil), err
} }
out.value = append(out.value.([]erlangNode), item) out.value = append(out.value.([]erlangNode), item)
@ -225,6 +251,9 @@ func parseErlangList(data []byte, i *int) (erlangNode, error) {
case ',': case ',':
*i++ *i++
continue continue
case '%':
// Starts a new comment node
continue
case ']', '}': case ']', '}':
*i++ *i++
return out, nil return out, nil
@ -234,3 +263,19 @@ func parseErlangList(data []byte, i *int) (erlangNode, error) {
} }
return out, nil return out, nil
} }
func parseErlangComment(data []byte, i *int) {
for *i < len(data) {
c := data[*i]
*i++
// Rest of a line is a comment. Deals with CR, LF and CR/LF
if c == '\n' {
break
} else if c == '\r' && data[*i] == '\n' {
*i++
break
}
}
}

View File

@ -38,6 +38,21 @@ func Test_parseErlang(t *testing.T) {
{<<"bcrypt">>, <<"3418821BC17CE6E96A4A77D1A88D7485BF783E212069FACFC79510AFBFF95352">>}, {<<"bcrypt">>, <<"3418821BC17CE6E96A4A77D1A88D7485BF783E212069FACFC79510AFBFF95352">>},
{<<"unicode_util_compat">>, <<"25EEE6D67DF61960CF6A794239566599B09E17E668D3700247BC498638152521">>}]} {<<"unicode_util_compat">>, <<"25EEE6D67DF61960CF6A794239566599B09E17E668D3700247BC498638152521">>}]}
].`, ].`,
},
{
name: "empty list",
content: `
{test, [
{with_space, [ ]},
{without_space, []}
]}`,
},
{
name: "valid strings",
content: `
{strings, [
"foo", 'bar'
]}`,
}, },
{ {
name: "invalid string content", name: "invalid string content",
@ -46,6 +61,14 @@ func Test_parseErlang(t *testing.T) {
{"1.2.0 {"1.2.0
">>}, ">>},
].`, ].`,
},
{
name: "string mismach",
wantErr: require.Error,
content: `
{bad_string, [
'foo"
]}`,
}, },
{ {
name: "invalid content", name: "invalid content",
@ -54,6 +77,25 @@ func Test_parseErlang(t *testing.T) {
{"1.2.0"}. {"1.2.0"}.
].`, ].`,
}, },
{
name: "valid comments",
content: `
{ comments, [
{ foo, bar },
%% this is a comment
% this is also a comment
{ hello, 'bar' }, %%inline comment
{ baz }
]}`,
},
{
name: "starts with a comments",
content: `
%% starts with comment
{ comments, [
{ foo, bar }
]}`,
},
} }
for _, test := range tests { for _, test := range tests {