From 6c1908a80572031cdb4733b3c151ceaf31ae3179 Mon Sep 17 00:00:00 2001 From: onozaty Date: Sat, 20 May 2023 18:51:17 +0900 Subject: [PATCH] Change to allow cncat to merge 3 or more CSV files --- README.md | 11 ++--- cmd/common_test.go | 3 +- cmd/concat.go | 115 +++++++++++++++++++++++---------------------- cmd/concat_test.go | 61 ++++++++++++++---------- 4 files changed, 101 insertions(+), 89 deletions(-) diff --git a/README.md b/README.md index 8b4b581..3a66e18 100644 --- a/README.md +++ b/README.md @@ -238,7 +238,7 @@ Check the column names and concat them into the same column. ### Usage ``` -csvt concat -1 INPUT1 -2 INPUT2 -o OUTPUT +csvt concat -i INPUT1 -i INPUT2 [-i INPUT3 ...] -o OUTPUT ``` ``` @@ -246,10 +246,9 @@ Usage: csvt concat [flags] Flags: - -1, --first string First CSV file path. - -2, --second string Second CSV file path. - -o, --output string Output CSV file path. - -h, --help help for concat + -i, --input stringArray Input CSV files path. + -o, --output string Output CSV file path. + -h, --help help for concat ``` ### Example @@ -273,7 +272,7 @@ name4,4 Concat `input1.csv` and `input2.csv`. ``` -$ csvt concat -1 input1.csv -2 input2.csv -o output.csv +$ csvt concat -i input1.csv -i input2.csv -o output.csv ``` The contents of the created `output.csv`. diff --git a/cmd/common_test.go b/cmd/common_test.go index cdc83b8..8eddb86 100644 --- a/cmd/common_test.go +++ b/cmd/common_test.go @@ -1,7 +1,6 @@ package cmd import ( - "io/ioutil" "os" "path/filepath" "strings" @@ -60,7 +59,7 @@ func joinRows(rows ...string) string { func readDir(t *testing.T, dir string) map[string][]byte { - files, err := ioutil.ReadDir(dir) + files, err := os.ReadDir(dir) if err != nil { t.Fatal("read dir failed\n", err) } diff --git a/cmd/concat.go b/cmd/concat.go index dcb265a..fe00142 100644 --- a/cmd/concat.go +++ b/cmd/concat.go @@ -21,40 +21,37 @@ func newConcatCmd() *cobra.Command { return err } - firstPath, _ := cmd.Flags().GetString("first") - secondPath, _ := cmd.Flags().GetString("second") + inputPaths, _ := cmd.Flags().GetStringArray("input") outputPath, _ := cmd.Flags().GetString("output") // 引数の解析に成功した時点で、エラーが起きてもUsageは表示しない cmd.SilenceUsage = true - return runConcat(format, firstPath, secondPath, outputPath) + return runConcat(format, inputPaths, outputPath) }, } - concatCmd.Flags().StringP("first", "1", "", "First CSV file path.") - concatCmd.MarkFlagRequired("first") - concatCmd.Flags().StringP("second", "2", "", "Second CSV file path.") - concatCmd.MarkFlagRequired("second") + concatCmd.Flags().StringArrayP("input", "i", []string{}, "Input CSV files path.") + concatCmd.MarkFlagRequired("input") concatCmd.Flags().StringP("output", "o", "", "Output CSV file path.") concatCmd.MarkFlagRequired("output") return concatCmd } -func runConcat(format csv.Format, firstPath string, secondPath string, outputPath string) error { +func runConcat(format csv.Format, inputPaths []string, outputPath string) error { - firstReader, firstClose, err := setupInput(firstPath, format) - if err != nil { - return err - } - defer firstClose() + readers := []csv.CsvReader{} - secondReader, secondClose, err := setupInput(secondPath, format) - if err != nil { - return err + for _, inputPath := range inputPaths { + reader, inputClose, err := setupInput(inputPath, format) + if err != nil { + return err + } + defer inputClose() + + readers = append(readers, reader) } - defer secondClose() writer, outputClose, err := setupOutput(outputPath, format) if err != nil { @@ -62,7 +59,7 @@ func runConcat(format csv.Format, firstPath string, secondPath string, outputPat } defer outputClose() - err = concat(firstReader, secondReader, writer) + err = concat(readers, writer) if err != nil { return err } @@ -70,34 +67,14 @@ func runConcat(format csv.Format, firstPath string, secondPath string, outputPat return writer.Flush() } -func concat(first csv.CsvReader, second csv.CsvReader, writer csv.CsvWriter) error { +func concat(readers []csv.CsvReader, writer csv.CsvWriter) error { - firstColumnNames, err := first.Read() + firstReader := readers[0] + firstColumnNames, err := firstReader.Read() if err != nil { return errors.Wrap(err, "failed to read the first CSV file") } - secondColumnNames, err := second.Read() - if err != nil { - return errors.Wrap(err, "failed to read the second CSV file") - } - - if len(firstColumnNames) != len(secondColumnNames) { - return fmt.Errorf("number of columns does not match") - } - - // 1つ目のCSVのカラム名と2つ目のCSVのカラム名のマッピングを作成 - secondColumnIndexes := []int{} - for _, firstColumnName := range firstColumnNames { - - secondColumnIndex, err := getTargetColumnIndex(secondColumnNames, firstColumnName) - if err != nil { - return errors.Wrap(err, "no column corresponding to the second CSV file") - } - - secondColumnIndexes = append(secondColumnIndexes, secondColumnIndex) - } - // 1つ目の書き込み err = writer.Write(firstColumnNames) if err != nil { @@ -105,7 +82,7 @@ func concat(first csv.CsvReader, second csv.CsvReader, writer csv.CsvWriter) err } for { - row, err := first.Read() + row, err := firstReader.Read() if err == io.EOF { break } @@ -119,25 +96,51 @@ func concat(first csv.CsvReader, second csv.CsvReader, writer csv.CsvWriter) err } } - // 2つ目の書き込み - for { - row, err := second.Read() - if err == io.EOF { - break - } + // 2つ目以降 + count := 1 + for _, reader := range readers[1:] { + count++ + + columnNames, err := reader.Read() if err != nil { - return errors.Wrap(err, "failed to read the second CSV file") + return errors.Wrapf(err, "failed to read CSV file (%d)", count) } - // 1つ目のCSVに合わせてカラム入れ替え - swapedRow := []string{} - for _, secondColumnIndex := range secondColumnIndexes { - swapedRow = append(swapedRow, row[secondColumnIndex]) + if len(firstColumnNames) != len(columnNames) { + return fmt.Errorf("number of columns does not match (%d)", count) } - err = writer.Write(swapedRow) - if err != nil { - return err + // 1つ目のCSVのカラム名とのカラム名のマッピングを作成 + columnIndexes := []int{} + for _, firstColumnName := range firstColumnNames { + + columnIndex, err := getTargetColumnIndex(columnNames, firstColumnName) + if err != nil { + return errors.Wrapf(err, "no column corresponding in CSV file (%d)", count) + } + + columnIndexes = append(columnIndexes, columnIndex) + } + + for { + row, err := reader.Read() + if err == io.EOF { + break + } + if err != nil { + return errors.Wrapf(err, "failed to read CSV file (%d)", count) + } + + // 1つ目のCSVに合わせてカラム入れ替え + swapedRow := []string{} + for _, columnIndex := range columnIndexes { + swapedRow = append(swapedRow, row[columnIndex]) + } + + err = writer.Write(swapedRow) + if err != nil { + return err + } } } diff --git a/cmd/concat_test.go b/cmd/concat_test.go index 3ddf78f..28cf25a 100644 --- a/cmd/concat_test.go +++ b/cmd/concat_test.go @@ -28,8 +28,8 @@ func TestConcatCmd(t *testing.T) { rootCmd := newRootCmd() rootCmd.SetArgs([]string{ "concat", - "-1", fi1, - "-2", fi2, + "-i", fi1, + "-i", fi2, "-o", fo, }) @@ -70,14 +70,23 @@ b2,b3,4 fi2 := createTempFile(t, s2) defer os.Remove(fi2) + s3 := `col3,col1,col2 +-3,5,-2 ++3,6,+2 +` + + fi3 := createTempFile(t, s3) + defer os.Remove(fi3) + fo := createTempFile(t, "") defer os.Remove(fo) rootCmd := newRootCmd() rootCmd.SetArgs([]string{ "concat", - "-1", fi1, - "-2", fi2, + "-i", fi1, + "-i", fi2, + "-i", fi3, "-o", fo, }) @@ -94,6 +103,8 @@ b2,b3,4 "2,y2,y3", "3,a2,a3", "4,b2,b3", + "5,-2,-3", + "6,+2,+3", ) if result != expect { @@ -121,8 +132,8 @@ a b rootCmd := newRootCmd() rootCmd.SetArgs([]string{ "concat", - "-1", fi1, - "-2", fi2, + "-i", fi1, + "-i", fi2, "-o", fo, "--delim", `\t`, }) @@ -159,8 +170,8 @@ func TestConcatCmd_invalidFormat(t *testing.T) { rootCmd := newRootCmd() rootCmd.SetArgs([]string{ "concat", - "-1", fi1, - "-2", fi2, + "-i", fi1, + "-i", fi2, "-o", fo, "--delim", `\t\t`, }) @@ -191,13 +202,13 @@ func TestConcatCmd_columnCountUnmatch(t *testing.T) { rootCmd := newRootCmd() rootCmd.SetArgs([]string{ "concat", - "-1", fi1, - "-2", fi2, + "-i", fi1, + "-i", fi2, "-o", fo, }) err := rootCmd.Execute() - if err == nil || err.Error() != "number of columns does not match" { + if err == nil || err.Error() != "number of columns does not match (2)" { t.Fatal("failed test\n", err) } } @@ -222,13 +233,13 @@ func TestConcatCmd_columnNotFound(t *testing.T) { rootCmd := newRootCmd() rootCmd.SetArgs([]string{ "concat", - "-1", fi1, - "-2", fi2, + "-i", fi1, + "-i", fi2, "-o", fo, }) err := rootCmd.Execute() - if err == nil || err.Error() != "no column corresponding to the second CSV file: missing col2 in the CSV file" { + if err == nil || err.Error() != "no column corresponding in CSV file (2): missing col2 in the CSV file" { t.Fatal("failed test\n", err) } } @@ -250,8 +261,8 @@ func TestConcatCmd_firstEmpty(t *testing.T) { rootCmd := newRootCmd() rootCmd.SetArgs([]string{ "concat", - "-1", fi1, - "-2", fi2, + "-i", fi1, + "-i", fi2, "-o", fo, }) @@ -279,13 +290,13 @@ func TestConcatCmd_secondEmpty(t *testing.T) { rootCmd := newRootCmd() rootCmd.SetArgs([]string{ "concat", - "-1", fi1, - "-2", fi2, + "-i", fi1, + "-i", fi2, "-o", fo, }) err := rootCmd.Execute() - if err == nil || err.Error() != "failed to read the second CSV file: EOF" { + if err == nil || err.Error() != "failed to read CSV file (2): EOF" { t.Fatal("failed test\n", err) } } @@ -304,8 +315,8 @@ func TestConcatCmd_firstFileNotFound(t *testing.T) { rootCmd := newRootCmd() rootCmd.SetArgs([]string{ "concat", - "-1", fi1 + "____", // 存在しないファイル - "-2", fi2, + "-i", fi1 + "____", // 存在しないファイル + "-i", fi2, "-o", fo, }) @@ -334,8 +345,8 @@ func TestConcatCmd_secondFileNotFound(t *testing.T) { rootCmd := newRootCmd() rootCmd.SetArgs([]string{ "concat", - "-1", fi1, - "-2", fi2 + "____", // 存在しないファイル + "-i", fi1, + "-i", fi2 + "____", // 存在しないファイル "-o", fo, }) @@ -364,8 +375,8 @@ func TestConcatCmd_outputFileNotFound(t *testing.T) { rootCmd := newRootCmd() rootCmd.SetArgs([]string{ "concat", - "-1", fi1, - "-2", fi2, + "-i", fi1, + "-i", fi2, "-o", fo + "/aa", // 存在しないフォルダ })