From 87a159c4587a194fa9386fb340980fec53bd61cf Mon Sep 17 00:00:00 2001
From: Neil Pankey <npankey@gmail.com>
Date: Tue, 6 Oct 2020 16:03:49 -0700
Subject: [PATCH] utf16: Schema errors and encoding tests

---
 gen_testdata.go | 85 ++++++++++++++++++++++-----------------------
 main.go         | 28 +++++++++------
 main_test.go    | 92 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 151 insertions(+), 54 deletions(-)

diff --git a/gen_testdata.go b/gen_testdata.go
index 062497b..fed4aaf 100644
--- a/gen_testdata.go
+++ b/gen_testdata.go
@@ -1,57 +1,56 @@
 // +build ignore
 
-// generates clones the utf-8 tests data to the other
+// gen_testdata clones the utf-8 tests data to the other
 // unicode encodings and adds BOM variants of each.
 package main
 
 import (
-    "io/ioutil"
-    "log"
-    "os"
-    "path/filepath"
+	"io/ioutil"
+	"log"
+	"os"
+	"path/filepath"
 
-    "golang.org/x/text/encoding"
-    "golang.org/x/text/encoding/unicode"
+	"golang.org/x/text/encoding"
+	"golang.org/x/text/encoding/unicode"
 )
 
-
 func main() {
-    var xforms = []struct {
-        dir, bom string
-        enc encoding.Encoding
-    } {
-        { "testdata/utf-16be", "\xFE\xFF", unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM) },
-        { "testdata/utf-16le", "\xFF\xFE", unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM) },
-    }
-
-    paths, _ := filepath.Glob("testdata/utf-8/*")
-    for _, p := range paths {
-        src, err := ioutil.ReadFile(p)
-        if err != nil {
-            log.Fatal(err)
-        }
-
-        write("testdata/utf-8_bom", p, "\xEF\xBB\xBF", src)
-        for _, xform := range xforms {
-            dst, err := xform.enc.NewEncoder().Bytes(src)
-            if err != nil {
-                log.Fatal(err)
-            }
-            write(xform.dir, p, "", dst)
-            write(xform.dir + "_bom", p, xform.bom, dst)
-        }
-    }
+	var xforms = []struct {
+		dir, bom string
+		enc      encoding.Encoding
+	}{
+		{"testdata/utf-16be", "\xFE\xFF", unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)},
+		{"testdata/utf-16le", "\xFF\xFE", unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM)},
+	}
+
+	paths, _ := filepath.Glob("testdata/utf-8/*")
+	for _, p := range paths {
+		src, err := ioutil.ReadFile(p)
+		if err != nil {
+			log.Fatal(err)
+		}
+
+		write("testdata/utf-8_bom", p, "\xEF\xBB\xBF", src)
+		for _, xform := range xforms {
+			dst, err := xform.enc.NewEncoder().Bytes(src)
+			if err != nil {
+				log.Fatal(err)
+			}
+			write(xform.dir, p, "", dst)
+			write(xform.dir+"_bom", p, xform.bom, dst)
+		}
+	}
 }
 
 func write(dir, orig, bom string, buf []byte) {
-    f, err := os.Create(filepath.Join(dir, filepath.Base(orig)))
-    if err != nil {
-        log.Fatal(err)
-    }
-    if _, err = f.Write([]byte(bom)); err != nil {
-        log.Fatal(err)
-    }
-    if _, err = f.Write(buf); err != nil {
-        log.Fatal(err)
-    }
+	f, err := os.Create(filepath.Join(dir, filepath.Base(orig)))
+	if err != nil {
+		log.Fatal(err)
+	}
+	if _, err = f.Write([]byte(bom)); err != nil {
+		log.Fatal(err)
+	}
+	if _, err = f.Write(buf); err != nil {
+		log.Fatal(err)
+	}
 }
diff --git a/main.go b/main.go
index 766b97e..bf33441 100644
--- a/main.go
+++ b/main.go
@@ -79,7 +79,7 @@ func realMain(args []string, w io.Writer) int {
 		dir := filepath.Dir(list)
 		f, err := os.Open(list)
 		if err != nil {
-			log.Fatalf("%s: %s\n", list, err)
+			return schemaError("%s: %s", list, err)
 		}
 		defer f.Close()
 
@@ -93,7 +93,7 @@ func realMain(args []string, w io.Writer) int {
 			docs = append(docs, glob(pattern)...)
 		}
 		if err := scanner.Err(); err != nil {
-			log.Fatalf("%s: invalid file list: %s\n", list, err)
+			return schemaError("%s: invalid file list: %s", list, err)
 		}
 	}
 	if len(docs) == 0 {
@@ -104,13 +104,13 @@ func realMain(args []string, w io.Writer) int {
 	sl := gojsonschema.NewSchemaLoader()
 	schemaPath, err := filepath.Abs(*schemaFlag)
 	if err != nil {
-		log.Fatalf("%s: unable to convert to absolute path: %s\n", *schemaFlag, err)
+		return schemaError("%s: unable to convert to absolute path: %s", *schemaFlag, err)
 	}
 	for _, ref := range refFlags {
 		for _, p := range glob(ref) {
 			absPath, err := filepath.Abs(p)
 			if err != nil {
-				log.Fatalf("%s: unable to convert to absolute path: %s\n", absPath, err)
+				return schemaError("%s: unable to convert to absolute path: %s", absPath, err)
 			}
 
 			if absPath == schemaPath {
@@ -119,22 +119,22 @@ func realMain(args []string, w io.Writer) int {
 
 			loader, err := jsonLoader(absPath)
 			if err != nil {
-				log.Fatalf("%s: unable to load schema ref: %s\n", *schemaFlag, err)
+				return schemaError("%s: unable to load schema ref: %s", *schemaFlag, err)
 			}
 
 			if err := sl.AddSchemas(loader); err != nil {
-				log.Fatalf("%s: invalid schema: %s\n", p, err)
+				return schemaError("%s: invalid schema: %s", p, err)
 			}
 		}
 	}
 
 	schemaLoader, err := jsonLoader(schemaPath)
 	if err != nil {
-		log.Fatalf("%s: unable to load schema: %s\n", *schemaFlag, err)
+		return schemaError("%s: unable to load schema: %s", *schemaFlag, err)
 	}
 	schema, err := sl.Compile(schemaLoader)
 	if err != nil {
-		log.Fatalf("%s: invalid schema: %s\n", *schemaFlag, err)
+		return schemaError("%s: invalid schema: %s", *schemaFlag, err)
 	}
 
 	// Validate the schema against each doc in parallel, limiting simultaneous
@@ -262,8 +262,8 @@ func jsonDecodeCharset(buf []byte) ([]byte, error) {
 func printUsage() {
 	fmt.Fprintf(os.Stderr, `Usage: %s -s schema.(json|yml) [options] document.(json|yml) ...
 
-  yajsv validates JSON and YAML document(s) against a schema. One of three statuses are
-  reported per document:
+  yajsv validates JSON and YAML document(s) against a schema. One of three status
+  results are reported per document:
 
     pass: Document is valid relative to the schema
     fail: Document is invalid relative to the schema
@@ -273,7 +273,8 @@ func printUsage() {
   schema validation failure.
 
   Sets the exit code to 1 on any failures, 2 on any errors, 3 on both, 4 on
-  invalid usage. Otherwise, 0 is returned if everything passes validation.
+  invalid usage, 5 on schema definition or file-list errors. Otherwise, 0 is
+  returned if everything passes validation.
 
 Options:
 
@@ -288,6 +289,11 @@ func usageError(msg string) int {
 	return 4
 }
 
+func schemaError(format string, args ...interface{}) int {
+	fmt.Fprintf(os.Stderr, format+"\n", args...)
+	return 5
+}
+
 // glob is a wrapper that also resolves `~` since we may be skipping
 // the shell expansion when single-quoting globs at the command line
 func glob(pattern string) []string {
diff --git a/main_test.go b/main_test.go
index 878fe8b..42d0413 100644
--- a/main_test.go
+++ b/main_test.go
@@ -1,12 +1,23 @@
 package main
 
 import (
+	"fmt"
+	"os"
 	"path/filepath"
 	"sort"
 	"strings"
 	"testing"
 )
 
+func init() {
+	// TODO: Cleanup this global monkey-patching
+	devnull, err := os.Open(os.DevNull)
+	if err != nil {
+		panic(err)
+	}
+	os.Stderr = devnull
+}
+
 func TestMain(t *testing.T) {
 	tests := []struct {
 		in   string
@@ -14,6 +25,10 @@ func TestMain(t *testing.T) {
 		exit int
 	}{
 		{
+			"-s testdata/utf-16be_bom/schema.json testdata/utf-16le_bom/data-fail.yml",
+			[]string{},
+			5,
+		}, {
 			"-s testdata/utf-8/schema.yml testdata/utf-8/data-pass.yml",
 			[]string{"testdata/utf-8/data-pass.yml: pass"},
 			0,
@@ -89,3 +104,80 @@ func TestMain(t *testing.T) {
 		})
 	}
 }
+
+func TestMatrix(t *testing.T) {
+	// schema.{format} {encoding}{_bom}/data-{expect}.{format}
+	type testcase struct {
+		schemaEnc, schemaFmt      string
+		dataEnc, dataFmt, dataRes string
+		allowBOM                  bool
+	}
+
+	encodings := []string{"utf-8", "utf-16be", "utf-16le", "utf-8_bom", "utf-16be_bom", "utf-16le_bom"}
+	formats := []string{"json", "yml"}
+	results := []string{"pass", "fail", "error"}
+	tests := []testcase{}
+
+	// poor mans cartesian product
+	for _, senc := range encodings {
+		for _, sfmt := range formats {
+			for _, denc := range encodings {
+				for _, dfmt := range formats {
+					for _, dres := range results {
+						tests = append(tests, testcase{senc, sfmt, denc, dfmt, dres, false})
+						tests = append(tests, testcase{senc, sfmt, denc, dfmt, dres, true})
+					}
+				}
+			}
+		}
+	}
+
+	for _, tt := range tests {
+		schemaBOM := strings.HasSuffix(tt.schemaEnc, "_bom")
+		schema16 := strings.HasPrefix(tt.schemaEnc, "utf-16")
+		dataBOM := strings.HasSuffix(tt.dataEnc, "_bom")
+		data16 := strings.HasPrefix(tt.dataEnc, "utf-16")
+
+		schema := fmt.Sprintf("testdata/%s/schema.%s", tt.schemaEnc, tt.schemaFmt)
+		data := fmt.Sprintf("testdata/%s/data-%s.%s", tt.dataEnc, tt.dataRes, tt.dataFmt)
+		cmd := fmt.Sprintf("-s %s %s", schema, data)
+		if tt.allowBOM {
+			cmd = "-b " + cmd
+		}
+
+		t.Run(cmd, func(t *testing.T) {
+			want := 0
+			switch {
+			// Schema Errors (exit = 5)
+			// - YAML w/out BOM for UTF-16
+			// - JSON w/ BOM but missing allowBOM flag
+			case tt.schemaFmt == "yml" && !schemaBOM && schema16:
+				want = 5
+			case tt.schemaFmt == "json" && schemaBOM && !tt.allowBOM:
+				want = 5
+			// Data Errors (exit = 2)
+			// - YAML w/out BOM for UTF-16
+			// - JSON w/ BOM but missing allowBOM flag
+			// - standard malformed files (e.g. data-error)
+			case tt.dataFmt == "yml" && !dataBOM && data16:
+				want = 2
+			case tt.dataFmt == "json" && dataBOM && !tt.allowBOM:
+				want = 2
+			case tt.dataRes == "error":
+				want = 2
+			// Data Failures
+			case tt.dataRes == "fail":
+				want = 1
+			}
+
+			// TODO: Cleanup this global monkey-patching
+			*bomFlag = tt.allowBOM
+
+			var w strings.Builder
+			got := realMain(strings.Split(cmd, " "), &w)
+			if got != want {
+				t.Errorf("got(%d) != want(%d) bomflag %t", got, want, *bomFlag)
+			}
+		})
+	}
+}
-- 
GitLab