#include #include "inout.hh" // for to_json() and from_json() #include "nfc.hh" // for illegal_utf8 exception #include "json_spirit/json_spirit_writer.h" #include "json_spirit/json_spirit_reader.h" #include namespace js = json_spirit; namespace { // for JSON input struct TestPair { std::string input; std::string output; }; // for JSON output struct TestTriple { std::string input; std::string output_esc; // with \uXXXX escapes std::string output_raw; // raw_utf8 }; std::ostream& operator<<(std::ostream& o, const TestPair& tp) { return o << "input=«" << tp.input << "», output=«" << tp.output << "». "; } std::ostream& operator<<(std::ostream& o, const TestTriple& tt) { return o << "input=«" << tt.input << "», esc=«" << tt.output_esc << "», raw=«" << tt.output_raw << "». "; } const char nullo[4] = {0,0,0,0}; const char null_x[4] = { '\0', 'X', '\0', '\n' }; const std::vector testValuesInput = { { R"("")" , "" }, // always start with the simple case ;-) { R"("123")" , "123" }, // some ASCII digits. Still easy. { R"("\n\\\b")" , "\n\\\b" }, // backslash escapes for ASCII and control chars { R"("\u001F")" , "\x1f" }, // C compiler knows \x##, but JSON does not { R"("\u007F")" , "\x7f" }, // C compiler knows \x##, but JSON does not { R"("\u00E4\u00F6\u00FC")" , "äöü" }, // German umlauts from Unicode block "Latin-1 Supplement" { R"("äöü")" , "äöü" }, // German umlauts from Unicode block "Latin-1 Supplement" { R"("\u041C\u043E\u0441\u043A\u0432\u0430")" , "Москва" }, // some Cyrillic { R"("Москва")" , "Москва" }, // some Cyrillic { R"("\uD83D\uDCA3")" , "\xF0\x9f\x92\xA3" }, // Unicode Bomb , an example for char outside of BMP { "\"\xF0\x9f\x92\xA3\"", "\xF0\x9f\x92\xA3" }, // Unicode Bomb , an example for char outside of BMP { R"("\u0000")" , std::string(nullo, nullo+1) }, // Yeah, 1 NUL byte { R"("\u0000\u0000")" , std::string(nullo, nullo+2) }, // Yeah, 2 NUL bytes { R"("\u0000X\u0000\n")", std::string(null_x, null_x+4) }, // guess what... { "\"EOF\"", "EOF" } }; const std::vector testValuesOutput = { { "" , R"("")" , R"("")" }, // always start with the simple case ;-) { "123" , R"("123")" , R"("123")" }, // some ASCII digits. Still easy. { "\n\\\b", R"("\n\\\b")" , R"("\n\\\b")" }, // backslash escapes for ASCII and control chars { "\x1f" , R"("\u001F")" , R"("\u001F")" }, // C compiler knows \x##, but JSON does not { "\x7f" , R"("\u007F")" , R"("\u007F")" }, // C compiler knows \x##, but JSON does not { "äöü" , R"("\u00E4\u00F6\u00FC")" , R"("äöü")" }, // German umlauts from Unicode block "Latin-1 Supplement" { "Москва", R"("\u041C\u043E\u0441\u043A\u0432\u0430")" , R"("Москва")" }, // some Cyrillic { "\xf0\x9f\x92\xa3", R"("\uD83D\uDCA3")" , "\"\xF0\x9f\x92\xA3\"" }, // Unicode Bomb , an example for char outside of BMP { std::string(nullo, nullo+1), R"("\u0000")" , R"("\u0000")" }, // Yeah, 1 NUL byte { std::string(nullo, nullo+2), R"("\u0000\u0000")" , R"("\u0000\u0000")" }, // Yeah, 2 NUL bytes { std::string(null_x, null_x+4), R"("\u0000X\u0000\n")" , R"("\u0000X\u0000\n")" }, // guess what... // a nasty and controversal example: // (LINE SEPARATOR) and (PARAGRAPH SEPARATOR) are legal in JSON but not in JavaScript. // A JSON encoder should _always_ escape them to avoid trouble in JavaScript: // See http://timelessrepo.com/json-isnt-a-javascript-subset // // { "\xe2\x80\xa8 \xe2\x80\xa9", R"("\u2028 \u2029")", R"("\u2028 \u2029")" }, { "EOF", "\"EOF\"", "\"EOF\"" } }; } // end of anonymous namespace class FromJsonTest : public ::testing::TestWithParam { // intentionally left blank for now. }; INSTANTIATE_TEST_CASE_P(FromJsonTestInstance, FromJsonTest, testing::ValuesIn(testValuesInput) ); class ToJsonTest : public ::testing::TestWithParam { // intentionally left blank for now. }; INSTANTIATE_TEST_CASE_P(ToJsonTestInstance, ToJsonTest, testing::ValuesIn(testValuesOutput) ); TEST_P( FromJsonTest, Meh ) { const auto param = GetParam(); js::Value v; js::read_or_throw( param.input, v); EXPECT_EQ( param.output, from_json(v) ); } TEST_P( ToJsonTest, Meh ) { const auto v = GetParam(); EXPECT_EQ( v.output_esc, simple_write( to_json( v.input )) ); EXPECT_EQ( v.output_raw, js::write( to_json( v.input ), js::raw_utf8) ); } TEST( ToJsonTest, IllegalUtf8 ) { // examples from UTF-8 stress test: EXPECT_THROW( to_json( "\x80" ), illegal_utf8 ); EXPECT_THROW( to_json( "\xbf" ), illegal_utf8 ); EXPECT_THROW( to_json( "\xc0" ), illegal_utf8 ); EXPECT_THROW( to_json( "\xc1" ), illegal_utf8 ); EXPECT_THROW( to_json( "\xc2" ), illegal_utf8 ); EXPECT_THROW( to_json( "\xfe" ), illegal_utf8 ); EXPECT_THROW( to_json( "\xff" ), illegal_utf8 ); EXPECT_THROW( to_json( "\xC0\xAF" ), illegal_utf8 ); // overlong "/" EXPECT_THROW( to_json( "\xE0\x80\xAF" ), illegal_utf8 ); // overlong "/" EXPECT_THROW( to_json( "\xF0\x80\x80\xAF" ), illegal_utf8 ); // overlong "/" EXPECT_THROW( to_json( "\xF4\x90\x80\x80" ), illegal_utf8 ); // bigger than U+10FFFF EXPECT_THROW( to_json( "\xED\xA0\x81\xED\xB0\x90" ), illegal_utf8 ); // CESU-8. Correct UTF-8 whoild be F0 90 90 80. } TEST( FromJsonTest, IllegalSequences ) { js::Value v; // too short \u escape sequences EXPECT_ANY_THROW( js::read_or_throw( R"("\")", v) ); EXPECT_THROW( js::read_or_throw( R"("\q")", v), std::runtime_error ); EXPECT_THROW( js::read_or_throw( R"("\u")", v), std::runtime_error ); EXPECT_THROW( js::read_or_throw( R"("\u1")", v), std::runtime_error ); EXPECT_THROW( js::read_or_throw( R"("\u12")", v), std::runtime_error ); EXPECT_THROW( js::read_or_throw( R"("\u123")", v), std::runtime_error ); // high surrogate without following legal low surrogate: EXPECT_THROW( js::read_or_throw( R"("\uD801")", v), std::runtime_error ); EXPECT_THROW( js::read_or_throw( R"("\uD801D")", v), std::runtime_error ); EXPECT_ANY_THROW( js::read_or_throw( R"("\uD801\")", v) ); EXPECT_THROW( js::read_or_throw( R"("\uD801\u")", v), std::runtime_error ); EXPECT_THROW( js::read_or_throw( R"("\uD801\uD")", v), std::runtime_error ); EXPECT_THROW( js::read_or_throw( R"("\uD801\uDC")", v), std::runtime_error ); EXPECT_THROW( js::read_or_throw( R"("\uD801\uDC0")", v), std::runtime_error ); EXPECT_THROW( js::read_or_throw( R"("\uD801\u1234")", v), std::runtime_error ); EXPECT_NO_THROW( js::read_or_throw( R"("\uD801\uDC02")", v) ); // legal UTF-16 sequence EXPECT_THROW( js::read_or_throw( R"("\uD801\uD801")", v), std::runtime_error ); // two high surrogates EXPECT_THROW( js::read_or_throw( R"("\uDC01\uDC01")", v), std::runtime_error ); // low surrogate without high surrogate before }