Skip to content

Commit

Permalink
Change to allow cncat to merge 3 or more CSV files
Browse files Browse the repository at this point in the history
  • Loading branch information
onozaty committed May 20, 2023
1 parent 0315aaf commit 6c1908a
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 89 deletions.
11 changes: 5 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -238,18 +238,17 @@ Check the column names and concat them into the same column.
### Usage

```
csvt concat -1 INPUT1 -2 INPUT2 -o OUTPUT
csvt concat -i INPUT1 -i INPUT2 [-i INPUT3 ...] -o OUTPUT
```

```
Usage:
csvt concat [flags]
Flags:
-1, --first string First CSV file path.
-2, --second string Second CSV file path.
-o, --output string Output CSV file path.
-h, --help help for concat
-i, --input stringArray Input CSV files path.
-o, --output string Output CSV file path.
-h, --help help for concat
```

### Example
Expand All @@ -273,7 +272,7 @@ name4,4
Concat `input1.csv` and `input2.csv`.

```
$ csvt concat -1 input1.csv -2 input2.csv -o output.csv
$ csvt concat -i input1.csv -i input2.csv -o output.csv
```

The contents of the created `output.csv`.
Expand Down
3 changes: 1 addition & 2 deletions cmd/common_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package cmd

import (
"io/ioutil"
"os"
"path/filepath"
"strings"
Expand Down Expand Up @@ -60,7 +59,7 @@ func joinRows(rows ...string) string {

func readDir(t *testing.T, dir string) map[string][]byte {

files, err := ioutil.ReadDir(dir)
files, err := os.ReadDir(dir)
if err != nil {
t.Fatal("read dir failed\n", err)
}
Expand Down
115 changes: 59 additions & 56 deletions cmd/concat.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,91 +21,68 @@ func newConcatCmd() *cobra.Command {
return err
}

firstPath, _ := cmd.Flags().GetString("first")
secondPath, _ := cmd.Flags().GetString("second")
inputPaths, _ := cmd.Flags().GetStringArray("input")
outputPath, _ := cmd.Flags().GetString("output")

// 引数の解析に成功した時点で、エラーが起きてもUsageは表示しない
cmd.SilenceUsage = true

return runConcat(format, firstPath, secondPath, outputPath)
return runConcat(format, inputPaths, outputPath)
},
}

concatCmd.Flags().StringP("first", "1", "", "First CSV file path.")
concatCmd.MarkFlagRequired("first")
concatCmd.Flags().StringP("second", "2", "", "Second CSV file path.")
concatCmd.MarkFlagRequired("second")
concatCmd.Flags().StringArrayP("input", "i", []string{}, "Input CSV files path.")
concatCmd.MarkFlagRequired("input")
concatCmd.Flags().StringP("output", "o", "", "Output CSV file path.")
concatCmd.MarkFlagRequired("output")

return concatCmd
}

func runConcat(format csv.Format, firstPath string, secondPath string, outputPath string) error {
func runConcat(format csv.Format, inputPaths []string, outputPath string) error {

firstReader, firstClose, err := setupInput(firstPath, format)
if err != nil {
return err
}
defer firstClose()
readers := []csv.CsvReader{}

secondReader, secondClose, err := setupInput(secondPath, format)
if err != nil {
return err
for _, inputPath := range inputPaths {
reader, inputClose, err := setupInput(inputPath, format)
if err != nil {
return err
}
defer inputClose()

readers = append(readers, reader)
}
defer secondClose()

writer, outputClose, err := setupOutput(outputPath, format)
if err != nil {
return err
}
defer outputClose()

err = concat(firstReader, secondReader, writer)
err = concat(readers, writer)
if err != nil {
return err
}

return writer.Flush()
}

func concat(first csv.CsvReader, second csv.CsvReader, writer csv.CsvWriter) error {
func concat(readers []csv.CsvReader, writer csv.CsvWriter) error {

firstColumnNames, err := first.Read()
firstReader := readers[0]
firstColumnNames, err := firstReader.Read()
if err != nil {
return errors.Wrap(err, "failed to read the first CSV file")
}

secondColumnNames, err := second.Read()
if err != nil {
return errors.Wrap(err, "failed to read the second CSV file")
}

if len(firstColumnNames) != len(secondColumnNames) {
return fmt.Errorf("number of columns does not match")
}

// 1つ目のCSVのカラム名と2つ目のCSVのカラム名のマッピングを作成
secondColumnIndexes := []int{}
for _, firstColumnName := range firstColumnNames {

secondColumnIndex, err := getTargetColumnIndex(secondColumnNames, firstColumnName)
if err != nil {
return errors.Wrap(err, "no column corresponding to the second CSV file")
}

secondColumnIndexes = append(secondColumnIndexes, secondColumnIndex)
}

// 1つ目の書き込み
err = writer.Write(firstColumnNames)
if err != nil {
return err
}

for {
row, err := first.Read()
row, err := firstReader.Read()
if err == io.EOF {
break
}
Expand All @@ -119,25 +96,51 @@ func concat(first csv.CsvReader, second csv.CsvReader, writer csv.CsvWriter) err
}
}

// 2つ目の書き込み
for {
row, err := second.Read()
if err == io.EOF {
break
}
// 2つ目以降
count := 1
for _, reader := range readers[1:] {
count++

columnNames, err := reader.Read()
if err != nil {
return errors.Wrap(err, "failed to read the second CSV file")
return errors.Wrapf(err, "failed to read CSV file (%d)", count)
}

// 1つ目のCSVに合わせてカラム入れ替え
swapedRow := []string{}
for _, secondColumnIndex := range secondColumnIndexes {
swapedRow = append(swapedRow, row[secondColumnIndex])
if len(firstColumnNames) != len(columnNames) {
return fmt.Errorf("number of columns does not match (%d)", count)
}

err = writer.Write(swapedRow)
if err != nil {
return err
// 1つ目のCSVのカラム名とのカラム名のマッピングを作成
columnIndexes := []int{}
for _, firstColumnName := range firstColumnNames {

columnIndex, err := getTargetColumnIndex(columnNames, firstColumnName)
if err != nil {
return errors.Wrapf(err, "no column corresponding in CSV file (%d)", count)
}

columnIndexes = append(columnIndexes, columnIndex)
}

for {
row, err := reader.Read()
if err == io.EOF {
break
}
if err != nil {
return errors.Wrapf(err, "failed to read CSV file (%d)", count)
}

// 1つ目のCSVに合わせてカラム入れ替え
swapedRow := []string{}
for _, columnIndex := range columnIndexes {
swapedRow = append(swapedRow, row[columnIndex])
}

err = writer.Write(swapedRow)
if err != nil {
return err
}
}
}

Expand Down
61 changes: 36 additions & 25 deletions cmd/concat_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ func TestConcatCmd(t *testing.T) {
rootCmd := newRootCmd()
rootCmd.SetArgs([]string{
"concat",
"-1", fi1,
"-2", fi2,
"-i", fi1,
"-i", fi2,
"-o", fo,
})

Expand Down Expand Up @@ -70,14 +70,23 @@ b2,b3,4
fi2 := createTempFile(t, s2)
defer os.Remove(fi2)

s3 := `col3,col1,col2
-3,5,-2
+3,6,+2
`

fi3 := createTempFile(t, s3)
defer os.Remove(fi3)

fo := createTempFile(t, "")
defer os.Remove(fo)

rootCmd := newRootCmd()
rootCmd.SetArgs([]string{
"concat",
"-1", fi1,
"-2", fi2,
"-i", fi1,
"-i", fi2,
"-i", fi3,
"-o", fo,
})

Expand All @@ -94,6 +103,8 @@ b2,b3,4
"2,y2,y3",
"3,a2,a3",
"4,b2,b3",
"5,-2,-3",
"6,+2,+3",
)

if result != expect {
Expand Down Expand Up @@ -121,8 +132,8 @@ a b
rootCmd := newRootCmd()
rootCmd.SetArgs([]string{
"concat",
"-1", fi1,
"-2", fi2,
"-i", fi1,
"-i", fi2,
"-o", fo,
"--delim", `\t`,
})
Expand Down Expand Up @@ -159,8 +170,8 @@ func TestConcatCmd_invalidFormat(t *testing.T) {
rootCmd := newRootCmd()
rootCmd.SetArgs([]string{
"concat",
"-1", fi1,
"-2", fi2,
"-i", fi1,
"-i", fi2,
"-o", fo,
"--delim", `\t\t`,
})
Expand Down Expand Up @@ -191,13 +202,13 @@ func TestConcatCmd_columnCountUnmatch(t *testing.T) {
rootCmd := newRootCmd()
rootCmd.SetArgs([]string{
"concat",
"-1", fi1,
"-2", fi2,
"-i", fi1,
"-i", fi2,
"-o", fo,
})

err := rootCmd.Execute()
if err == nil || err.Error() != "number of columns does not match" {
if err == nil || err.Error() != "number of columns does not match (2)" {
t.Fatal("failed test\n", err)
}
}
Expand All @@ -222,13 +233,13 @@ func TestConcatCmd_columnNotFound(t *testing.T) {
rootCmd := newRootCmd()
rootCmd.SetArgs([]string{
"concat",
"-1", fi1,
"-2", fi2,
"-i", fi1,
"-i", fi2,
"-o", fo,
})

err := rootCmd.Execute()
if err == nil || err.Error() != "no column corresponding to the second CSV file: missing col2 in the CSV file" {
if err == nil || err.Error() != "no column corresponding in CSV file (2): missing col2 in the CSV file" {
t.Fatal("failed test\n", err)
}
}
Expand All @@ -250,8 +261,8 @@ func TestConcatCmd_firstEmpty(t *testing.T) {
rootCmd := newRootCmd()
rootCmd.SetArgs([]string{
"concat",
"-1", fi1,
"-2", fi2,
"-i", fi1,
"-i", fi2,
"-o", fo,
})

Expand Down Expand Up @@ -279,13 +290,13 @@ func TestConcatCmd_secondEmpty(t *testing.T) {
rootCmd := newRootCmd()
rootCmd.SetArgs([]string{
"concat",
"-1", fi1,
"-2", fi2,
"-i", fi1,
"-i", fi2,
"-o", fo,
})

err := rootCmd.Execute()
if err == nil || err.Error() != "failed to read the second CSV file: EOF" {
if err == nil || err.Error() != "failed to read CSV file (2): EOF" {
t.Fatal("failed test\n", err)
}
}
Expand All @@ -304,8 +315,8 @@ func TestConcatCmd_firstFileNotFound(t *testing.T) {
rootCmd := newRootCmd()
rootCmd.SetArgs([]string{
"concat",
"-1", fi1 + "____", // 存在しないファイル
"-2", fi2,
"-i", fi1 + "____", // 存在しないファイル
"-i", fi2,
"-o", fo,
})

Expand Down Expand Up @@ -334,8 +345,8 @@ func TestConcatCmd_secondFileNotFound(t *testing.T) {
rootCmd := newRootCmd()
rootCmd.SetArgs([]string{
"concat",
"-1", fi1,
"-2", fi2 + "____", // 存在しないファイル
"-i", fi1,
"-i", fi2 + "____", // 存在しないファイル
"-o", fo,
})

Expand Down Expand Up @@ -364,8 +375,8 @@ func TestConcatCmd_outputFileNotFound(t *testing.T) {
rootCmd := newRootCmd()
rootCmd.SetArgs([]string{
"concat",
"-1", fi1,
"-2", fi2,
"-i", fi1,
"-i", fi2,
"-o", fo + "/aa", // 存在しないフォルダ
})

Expand Down

0 comments on commit 6c1908a

Please sign in to comment.