Commit a2ab44de authored by Dridi Boukelmoune's avatar Dridi Boukelmoune

vav: Tighten arguments parsing

Commas unlike spaces are hard separators, so a trailing comma leads to a
last empty parameter.

A comma may appear after an argument's "trailing" spaces and should not
result in an additional parameter. In <foo  , bar> we should expect two
fields, not three.

Comments are only treated as such at arguments boundaries: <foo #bar>
parses one field <foo> and <foo#bar> parses one field <foo#bar>, taking
the shell word splitting as the model, cementing what was the existing
VAV behavior in the first place.

Unlike the shell, we don't expect quotes to start in the middle of a
token so <foo"bar> is invalid unless escaping was disabled.

Fields that are quoted need a separator: <"foo""bar"> is therefore
invalid unless escaping was disabled.
parent 4f1fd412
......@@ -156,19 +156,23 @@ VAV_BackSlashDecode(const char *s, const char *e)
}
static char err_invalid_backslash[] = "Invalid backslash sequence";
static char err_invalid_quote[] = "Invalid '\"'";
static char err_missing_quote[] = "Missing '\"'";
static char err_missing_separator[] = "Missing separator between arguments";
char **
VAV_ParseTxt(const char *b, const char *e, int *argc, int flag)
{
char **argv;
const char *p;
const char *p, *sep;
int nargv, largv;
int i, quote;
AN(b);
if (e == NULL)
e = strchr(b, '\0');
sep = NULL;
quote = 0;
nargv = 1;
largv = 16;
argv = calloc(largv, sizeof *argv);
......@@ -180,6 +184,17 @@ VAV_ParseTxt(const char *b, const char *e, int *argc, int flag)
b++;
continue;
}
if (sep != NULL && isspace(*sep) &&
*b == ',' && (flag & ARGV_COMMA)) {
sep = NULL;
b++;
continue;
}
if (sep != NULL && *sep == '"' && *b == '"') {
argv[0] = err_missing_separator;
return (argv);
}
sep = NULL;
if ((flag & ARGV_COMMENT) && *b == '#')
break;
if (*b == '"' && !(flag & ARGV_NOESC)) {
......@@ -200,22 +215,34 @@ VAV_ParseTxt(const char *b, const char *e, int *argc, int flag)
continue;
}
if (!quote) {
if (isspace(*b))
if (isspace(*b)) {
sep = b;
break;
if ((flag & ARGV_COMMA) && *b == ',')
}
if ((flag & ARGV_COMMA) && *b == ',') {
sep = b;
break;
}
if (!(flag & ARGV_NOESC) && *b == '"') {
argv[0] = err_invalid_quote;
return (argv);
}
b++;
continue;
}
if (*b == '"' && !(flag & ARGV_NOESC))
if (*b == '"' && !(flag & ARGV_NOESC)) {
sep = b;
quote = 0;
break;
}
b++;
}
if (b >= e && quote) {
if (sep == NULL && quote) {
argv[0] = err_missing_quote;
return (argv);
}
if (nargv + 1 >= largv) {
/* Ensure slots for 1 new arg plus 1 trailing arg */
if (nargv + 2 >= largv) {
argv = realloc(argv, sizeof (*argv) * (largv += largv));
assert(argv != NULL);
}
......@@ -232,6 +259,11 @@ VAV_ParseTxt(const char *b, const char *e, int *argc, int flag)
if (b < e)
b++;
}
if (sep != NULL && *sep == ',') {
argv[nargv] = strdup("");
assert(argv[nargv] != NULL);
nargv++;
}
argv[nargv] = NULL;
if (argc != NULL)
*argc = nargv;
......@@ -349,9 +381,14 @@ static const struct test_case *tests[] = {
TEST_PASS( C , "foo bar", "foo", "bar"),
TEST_PASS( C , "foo,bar", "foo", "bar"),
TEST_PASS(0 , " foo bar ", "foo", "bar"),
TEST_PASS( C , " foo , bar ", "foo", "bar"),
TEST_PASS( C , "foo bar ", "foo", "bar"),
TEST_PASS( C , "foo,bar,", "foo", "bar", ""),
TEST_PASS(0 , "foo \"bar baz\"", "foo", "bar baz"),
TEST_PASS(0 , "foo #bar", "foo", "#bar"),
TEST_PASS(K , "foo #bar", "foo"),
TEST_PASS(0 , "foo#bar", "foo#bar"),
TEST_PASS(K , "foo#bar", "foo#bar"),
TEST_PASS( N, "\\", "\\"),
TEST_FAIL(0 , "\\", invalid_backslash),
TEST_FAIL(0 , "\\x", invalid_backslash),
......@@ -359,6 +396,11 @@ static const struct test_case *tests[] = {
TEST_FAIL(0 , "\\x2O", invalid_backslash),
TEST_PASS(0 , "\\x20", " "),
TEST_FAIL(0 , "\"foo", missing_quote),
TEST_PASS( N, "foo\"bar", "foo\"bar"),
TEST_FAIL(0 , "foo\"bar", invalid_quote),
TEST_FAIL(0 , "foo\"bar", invalid_quote),
TEST_PASS( N, "\"foo\"\"bar\"", "\"foo\"\"bar\""),
TEST_FAIL(0 , "\"foo\"\"bar\"", missing_separator),
NULL
#undef N
#undef C
......@@ -397,6 +439,9 @@ test_run(const struct test_case *tc, int *ret)
return (argv);
}
if (tc->argv[0] != NULL)
return (argv);
for (i = 1; i < argc && tc->argv[i] != NULL && argv[i] != NULL; i++) {
if (!strcmp(tc->argv[i], argv[i]))
continue;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment