Skip to content
Snippets Groups Projects
Unverified Commit b2b9a856 authored by Neil Pankey's avatar Neil Pankey Committed by GitHub
Browse files

Merge pull request #14 from neilpa/unicode-encodings

UTF-16 support for JSON and option to skip BOMs
parents 50710192 87a159c4
No related branches found
No related tags found
No related merge requests found
Showing
with 258 additions and 44 deletions
// +build ignore
// gen_testdata clones the utf-8 tests data to the other
// unicode encodings and adds BOM variants of each.
package main
import (
"io/ioutil"
"log"
"os"
"path/filepath"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/unicode"
)
func main() {
var xforms = []struct {
dir, bom string
enc encoding.Encoding
}{
{"testdata/utf-16be", "\xFE\xFF", unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)},
{"testdata/utf-16le", "\xFF\xFE", unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM)},
}
paths, _ := filepath.Glob("testdata/utf-8/*")
for _, p := range paths {
src, err := ioutil.ReadFile(p)
if err != nil {
log.Fatal(err)
}
write("testdata/utf-8_bom", p, "\xEF\xBB\xBF", src)
for _, xform := range xforms {
dst, err := xform.enc.NewEncoder().Bytes(src)
if err != nil {
log.Fatal(err)
}
write(xform.dir, p, "", dst)
write(xform.dir+"_bom", p, xform.bom, dst)
}
}
}
func write(dir, orig, bom string, buf []byte) {
f, err := os.Create(filepath.Join(dir, filepath.Base(orig)))
if err != nil {
log.Fatal(err)
}
if _, err = f.Write([]byte(bom)); err != nil {
log.Fatal(err)
}
if _, err = f.Write(buf); err != nil {
log.Fatal(err)
}
}
......@@ -2,8 +2,11 @@
// a provided JSON Schema - https://json-schema.org/
package main
//go:generate go run gen_testdata.go
import (
"bufio"
"bytes"
"flag"
"fmt"
"io"
......@@ -15,21 +18,37 @@ import (
"strings"
"sync"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/unicode"
"github.com/ghodss/yaml"
"github.com/mitchellh/go-homedir"
"github.com/xeipuuv/gojsonschema"
)
var (
version = "v1.3.0-dev"
version = "v1.4.0-dev"
schemaFlag = flag.String("s", "", "primary JSON schema to validate against, required")
quietFlag = flag.Bool("q", false, "quiet, only print validation failures and errors")
versionFlag = flag.Bool("v", false, "print version and exit")
bomFlag = flag.Bool("b", false, "allow BOM in JSON files, error if seen and unset")
listFlags stringFlags
refFlags stringFlags
)
// https://en.wikipedia.org/wiki/Byte_order_mark#Byte_order_marks_by_encoding
const (
bomUTF8 = "\xEF\xBB\xBF"
bomUTF16BE = "\xFE\xFF"
bomUTF16LE = "\xFF\xFE"
)
var (
encUTF16BE = unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)
encUTF16LE = unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM)
)
func init() {
flag.Var(&listFlags, "l", "validate JSON documents from newline separated paths and/or globs in a text file (relative to the basename of the file itself)")
flag.Var(&refFlags, "r", "referenced schema(s), can be globs and/or used multiple times")
......@@ -60,7 +79,7 @@ func realMain(args []string, w io.Writer) int {
dir := filepath.Dir(list)
f, err := os.Open(list)
if err != nil {
log.Fatalf("%s: %s\n", list, err)
return schemaError("%s: %s", list, err)
}
defer f.Close()
......@@ -74,7 +93,7 @@ func realMain(args []string, w io.Writer) int {
docs = append(docs, glob(pattern)...)
}
if err := scanner.Err(); err != nil {
log.Fatalf("%s: invalid file list: %s\n", list, err)
return schemaError("%s: invalid file list: %s", list, err)
}
}
if len(docs) == 0 {
......@@ -85,13 +104,13 @@ func realMain(args []string, w io.Writer) int {
sl := gojsonschema.NewSchemaLoader()
schemaPath, err := filepath.Abs(*schemaFlag)
if err != nil {
log.Fatalf("%s: unable to convert to absolute path: %s\n", *schemaFlag, err)
return schemaError("%s: unable to convert to absolute path: %s", *schemaFlag, err)
}
for _, ref := range refFlags {
for _, p := range glob(ref) {
absPath, err := filepath.Abs(p)
if err != nil {
log.Fatalf("%s: unable to convert to absolute path: %s\n", absPath, err)
return schemaError("%s: unable to convert to absolute path: %s", absPath, err)
}
if absPath == schemaPath {
......@@ -100,22 +119,22 @@ func realMain(args []string, w io.Writer) int {
loader, err := jsonLoader(absPath)
if err != nil {
log.Fatalf("%s: unable to load schema ref: %s\n", *schemaFlag, err)
return schemaError("%s: unable to load schema ref: %s", *schemaFlag, err)
}
if err := sl.AddSchemas(loader); err != nil {
log.Fatalf("%s: invalid schema: %s\n", p, err)
return schemaError("%s: invalid schema: %s", p, err)
}
}
}
schemaLoader, err := jsonLoader(schemaPath)
if err != nil {
log.Fatalf("%s: unable to load schema: %s\n", *schemaFlag, err)
return schemaError("%s: unable to load schema: %s", *schemaFlag, err)
}
schema, err := sl.Compile(schemaLoader)
if err != nil {
log.Fatalf("%s: invalid schema: %s\n", *schemaFlag, err)
return schemaError("%s: invalid schema: %s", *schemaFlag, err)
}
// Validate the schema against each doc in parallel, limiting simultaneous
......@@ -131,7 +150,6 @@ func realMain(args []string, w io.Writer) int {
sem <- 0
defer func() { <-sem }()
loader, err := jsonLoader(path)
if err != nil {
msg := fmt.Sprintf("%s: error: load doc: %s", path, err)
......@@ -190,19 +208,62 @@ func jsonLoader(path string) (gojsonschema.JSONLoader, error) {
}
switch filepath.Ext(path) {
case ".yml", ".yaml":
// TODO YAML requires the precense of a BOM to detect UTF-16
// text. Is there a decent hueristic to detect UTF-16 text
// missing a BOM so we can provide a better error message?
buf, err = yaml.YAMLToJSON(buf)
default:
buf, err = jsonDecodeCharset(buf)
}
if err != nil {
return nil, err
}
// TODO What if we have an empty document?
return gojsonschema.NewBytesLoader(buf), nil
}
// jsonDecodeCharset attempts to detect UTF-16 (LE or BE) JSON text and
// decode as appropriate. It also skips a BOM at the start of the buffer
// if `-b` was specified. Presence of a BOM is an error otherwise.
func jsonDecodeCharset(buf []byte) ([]byte, error) {
if len(buf) < 2 { // UTF-8
return buf, nil
}
bom := ""
var enc encoding.Encoding
switch {
case bytes.HasPrefix(buf, []byte(bomUTF8)):
bom = bomUTF8
case bytes.HasPrefix(buf, []byte(bomUTF16BE)):
bom = bomUTF16BE
enc = encUTF16BE
case bytes.HasPrefix(buf, []byte(bomUTF16LE)):
bom = bomUTF16LE
enc = encUTF16LE
case buf[0] == 0:
enc = encUTF16BE
case buf[1] == 0:
enc = encUTF16LE
}
if bom != "" {
if !*bomFlag {
return nil, fmt.Errorf("unexpected BOM, see `-b` flag")
}
buf = buf[len(bom):]
}
if enc != nil {
return enc.NewDecoder().Bytes(buf)
}
return buf, nil
}
func printUsage() {
fmt.Fprintf(os.Stderr, `Usage: %s -s schema.(json|yml) [options] document.(json|yml) ...
yajsv validates JSON and YAML document(s) against a schema. One of three statuses are
reported per document:
yajsv validates JSON and YAML document(s) against a schema. One of three status
results are reported per document:
pass: Document is valid relative to the schema
fail: Document is invalid relative to the schema
......@@ -212,7 +273,8 @@ func printUsage() {
schema validation failure.
Sets the exit code to 1 on any failures, 2 on any errors, 3 on both, 4 on
invalid usage. Otherwise, 0 is returned if everything passes validation.
invalid usage, 5 on schema definition or file-list errors. Otherwise, 0 is
returned if everything passes validation.
Options:
......@@ -227,6 +289,11 @@ func usageError(msg string) int {
return 4
}
func schemaError(format string, args ...interface{}) int {
fmt.Fprintf(os.Stderr, format+"\n", args...)
return 5
}
// glob is a wrapper that also resolves `~` since we may be skipping
// the shell expansion when single-quoting globs at the command line
func glob(pattern string) []string {
......
package main
import (
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"testing"
)
func init() {
// TODO: Cleanup this global monkey-patching
devnull, err := os.Open(os.DevNull)
if err != nil {
panic(err)
}
os.Stderr = devnull
}
func TestMain(t *testing.T) {
tests := []struct {
in string
......@@ -14,65 +25,69 @@ func TestMain(t *testing.T) {
exit int
}{
{
"-s testdata/schema.yml testdata/data-pass.yml",
[]string{"testdata/data-pass.yml: pass"},
"-s testdata/utf-16be_bom/schema.json testdata/utf-16le_bom/data-fail.yml",
[]string{},
5,
}, {
"-s testdata/utf-8/schema.yml testdata/utf-8/data-pass.yml",
[]string{"testdata/utf-8/data-pass.yml: pass"},
0,
}, {
"-s testdata/schema.json testdata/data-pass.yml",
[]string{"testdata/data-pass.yml: pass"},
"-s testdata/utf-8/schema.json testdata/utf-8/data-pass.yml",
[]string{"testdata/utf-8/data-pass.yml: pass"},
0,
}, {
"-s testdata/schema.json testdata/data-pass.json",
[]string{"testdata/data-pass.json: pass"},
"-s testdata/utf-8/schema.json testdata/utf-8/data-pass.json",
[]string{"testdata/utf-8/data-pass.json: pass"},
0,
}, {
"-s testdata/schema.yml testdata/data-pass.json",
[]string{"testdata/data-pass.json: pass"},
"-s testdata/utf-8/schema.yml testdata/utf-8/data-pass.json",
[]string{"testdata/utf-8/data-pass.json: pass"},
0,
}, {
"-q -s testdata/schema.yml testdata/data-fail.yml",
[]string{"testdata/data-fail.yml: fail: (root): foo is required"},
"-q -s testdata/utf-8/schema.yml testdata/utf-8/data-fail.yml",
[]string{"testdata/utf-8/data-fail.yml: fail: (root): foo is required"},
1,
}, {
"-q -s testdata/schema.json testdata/data-fail.yml",
[]string{"testdata/data-fail.yml: fail: (root): foo is required"},
"-q -s testdata/utf-8/schema.json testdata/utf-8/data-fail.yml",
[]string{"testdata/utf-8/data-fail.yml: fail: (root): foo is required"},
1,
}, {
"-q -s testdata/schema.json testdata/data-fail.json",
[]string{"testdata/data-fail.json: fail: (root): foo is required"},
"-q -s testdata/utf-8/schema.json testdata/utf-8/data-fail.json",
[]string{"testdata/utf-8/data-fail.json: fail: (root): foo is required"},
1,
}, {
"-q -s testdata/schema.yml testdata/data-fail.json",
[]string{"testdata/data-fail.json: fail: (root): foo is required"},
"-q -s testdata/utf-8/schema.yml testdata/utf-8/data-fail.json",
[]string{"testdata/utf-8/data-fail.json: fail: (root): foo is required"},
1,
}, {
"-q -s testdata/schema.json testdata/data-error.json",
[]string{"testdata/data-error.json: error: validate: invalid character 'o' in literal null (expecting 'u')"},
"-q -s testdata/utf-8/schema.json testdata/utf-8/data-error.json",
[]string{"testdata/utf-8/data-error.json: error: validate: invalid character 'o' in literal null (expecting 'u')"},
2,
}, {
"-q -s testdata/schema.yml testdata/data-error.yml",
[]string{"testdata/data-error.yml: error: load doc: yaml: found unexpected end of stream"},
"-q -s testdata/utf-8/schema.yml testdata/utf-8/data-error.yml",
[]string{"testdata/utf-8/data-error.yml: error: load doc: yaml: found unexpected end of stream"},
2,
}, {
"-q -s testdata/schema.json testdata/data-*.json",
"-q -s testdata/utf-8/schema.json testdata/utf-8/data-*.json",
[]string{
"testdata/data-fail.json: fail: (root): foo is required",
"testdata/data-error.json: error: validate: invalid character 'o' in literal null (expecting 'u')",
"testdata/utf-8/data-fail.json: fail: (root): foo is required",
"testdata/utf-8/data-error.json: error: validate: invalid character 'o' in literal null (expecting 'u')",
}, 3,
}, {
"-q -s testdata/schema.yml testdata/data-*.yml",
"-q -s testdata/utf-8/schema.yml testdata/utf-8/data-*.yml",
[]string{
"testdata/data-error.yml: error: load doc: yaml: found unexpected end of stream",
"testdata/data-fail.yml: fail: (root): foo is required",
"testdata/utf-8/data-error.yml: error: load doc: yaml: found unexpected end of stream",
"testdata/utf-8/data-fail.yml: fail: (root): foo is required",
}, 3,
},
}
for _, tt := range tests {
in := strings.ReplaceAll(tt.in, "/", string(filepath.Separator))
in := strings.Replace(tt.in, "/", string(filepath.Separator), -1)
sort.Strings(tt.out)
out := strings.Join(tt.out, "\n")
out = strings.ReplaceAll(out, "/", string(filepath.Separator))
out = strings.Replace(out, "/", string(filepath.Separator), -1)
t.Run(in, func(t *testing.T) {
var w strings.Builder
......@@ -90,3 +105,79 @@ func TestMain(t *testing.T) {
}
}
func TestMatrix(t *testing.T) {
// schema.{format} {encoding}{_bom}/data-{expect}.{format}
type testcase struct {
schemaEnc, schemaFmt string
dataEnc, dataFmt, dataRes string
allowBOM bool
}
encodings := []string{"utf-8", "utf-16be", "utf-16le", "utf-8_bom", "utf-16be_bom", "utf-16le_bom"}
formats := []string{"json", "yml"}
results := []string{"pass", "fail", "error"}
tests := []testcase{}
// poor mans cartesian product
for _, senc := range encodings {
for _, sfmt := range formats {
for _, denc := range encodings {
for _, dfmt := range formats {
for _, dres := range results {
tests = append(tests, testcase{senc, sfmt, denc, dfmt, dres, false})
tests = append(tests, testcase{senc, sfmt, denc, dfmt, dres, true})
}
}
}
}
}
for _, tt := range tests {
schemaBOM := strings.HasSuffix(tt.schemaEnc, "_bom")
schema16 := strings.HasPrefix(tt.schemaEnc, "utf-16")
dataBOM := strings.HasSuffix(tt.dataEnc, "_bom")
data16 := strings.HasPrefix(tt.dataEnc, "utf-16")
schema := fmt.Sprintf("testdata/%s/schema.%s", tt.schemaEnc, tt.schemaFmt)
data := fmt.Sprintf("testdata/%s/data-%s.%s", tt.dataEnc, tt.dataRes, tt.dataFmt)
cmd := fmt.Sprintf("-s %s %s", schema, data)
if tt.allowBOM {
cmd = "-b " + cmd
}
t.Run(cmd, func(t *testing.T) {
want := 0
switch {
// Schema Errors (exit = 5)
// - YAML w/out BOM for UTF-16
// - JSON w/ BOM but missing allowBOM flag
case tt.schemaFmt == "yml" && !schemaBOM && schema16:
want = 5
case tt.schemaFmt == "json" && schemaBOM && !tt.allowBOM:
want = 5
// Data Errors (exit = 2)
// - YAML w/out BOM for UTF-16
// - JSON w/ BOM but missing allowBOM flag
// - standard malformed files (e.g. data-error)
case tt.dataFmt == "yml" && !dataBOM && data16:
want = 2
case tt.dataFmt == "json" && dataBOM && !tt.allowBOM:
want = 2
case tt.dataRes == "error":
want = 2
// Data Failures
case tt.dataRes == "fail":
want = 1
}
// TODO: Cleanup this global monkey-patching
*bomFlag = tt.allowBOM
var w strings.Builder
got := realMain(strings.Split(cmd, " "), &w)
if got != want {
t.Errorf("got(%d) != want(%d) bomflag %t", got, want, *bomFlag)
}
})
}
}
File added
File added
File added
File added
File added
File added
File added
File added
File suppressed by a .gitattributes entry or the file's encoding is unsupported.
File suppressed by a .gitattributes entry or the file's encoding is unsupported.
File suppressed by a .gitattributes entry or the file's encoding is unsupported.
File suppressed by a .gitattributes entry or the file's encoding is unsupported.
File suppressed by a .gitattributes entry or the file's encoding is unsupported.
File suppressed by a .gitattributes entry or the file's encoding is unsupported.
File suppressed by a .gitattributes entry or the file's encoding is unsupported.
File suppressed by a .gitattributes entry or the file's encoding is unsupported.
File added
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment