From 6f175c69145cd44e20d0a59ffbb9a9c698f3ee39 Mon Sep 17 00:00:00 2001 From: Konstantin Grachev Date: Thu, 18 Jan 2024 21:50:27 +0300 Subject: [PATCH] sort-cli: init --- sort-cli/go.mod | 14 +++ sort-cli/go.sum | 12 +++ sort-cli/main.go | 211 ++++++++++++++++++++++++++++++++++++++++ sort-cli/main_test.go | 57 +++++++++++ sort-cli/testdata/first | 7 ++ 5 files changed, 301 insertions(+) create mode 100644 sort-cli/go.mod create mode 100644 sort-cli/go.sum create mode 100644 sort-cli/main.go create mode 100644 sort-cli/main_test.go create mode 100644 sort-cli/testdata/first diff --git a/sort-cli/go.mod b/sort-cli/go.mod new file mode 100644 index 0000000..a815647 --- /dev/null +++ b/sort-cli/go.mod @@ -0,0 +1,14 @@ +module git.grachevko.ru/edu/cli/sort + +go 1.21.6 + +require ( + github.com/stretchr/testify v1.8.4 + golang.org/x/exp v0.0.0-20240112132812-db7319d0e0e3 +) + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/sort-cli/go.sum b/sort-cli/go.sum new file mode 100644 index 0000000..2939b8f --- /dev/null +++ b/sort-cli/go.sum @@ -0,0 +1,12 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +golang.org/x/exp v0.0.0-20240112132812-db7319d0e0e3 h1:hNQpMuAJe5CtcUqCXaWga3FHu+kQvCqcsoVaQgSV60o= +golang.org/x/exp v0.0.0-20240112132812-db7319d0e0e3/go.mod h1:idGWGoKP1toJGkd5/ig9ZLuPcZBC3ewk7SzmH0uou08= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/sort-cli/main.go b/sort-cli/main.go new file mode 100644 index 0000000..0bf900d --- /dev/null +++ b/sort-cli/main.go @@ -0,0 +1,211 @@ +package main + +import ( + "bytes" + "flag" + "fmt" + "golang.org/x/exp/maps" + "io" + "log" + "os" + "sort" + "strings" +) + +type Cfg struct { + Key int + Numeric bool + Reverse bool + Unique bool +} + +func main() { + fmt.Print(run()) + + os.Exit(0) +} + +func run() string { + var c Cfg + + flag.IntVar(&c.Key, "k", 0, "sort via column") + flag.BoolVar(&c.Numeric, "n", false, "compare according to string numerical value") + flag.BoolVar(&c.Reverse, "r", false, "reverse the result of comparisons") + flag.BoolVar(&c.Unique, "u", false, "output only the first of an equal run") + + flag.Parse() + + return doSort(&c, flag.Args()) +} + +func doSort(cfg *Cfg, sources []string) string { + r := splitLines(load(sources), []byte("\n"), cfg.Key) + + if cfg.Unique { + r = uniques(r) + } + + rs := &Rows{s: r, cfg: cfg} + sort.Sort(rs) + + return rs.String() +} + +type Row struct { + Line []byte + Column []byte +} + +type Rows struct { + s []*Row + cfg *Cfg +} + +func (p *Rows) Len() int { return len(p.s) } +func (p *Rows) Less(i, j int) bool { + s := p.s + + lr := s[i] + rr := s[j] + + if p.cfg.Reverse { + lr, rr = rr, lr + } + + var l, r []rune + + if p.cfg.Key == 0 { + l, r = bytes.Runes(lr.Line), bytes.Runes(rr.Line) + } else { + l, r = bytes.Runes(lr.Column), bytes.Runes(rr.Column) + } + + ln := len(l) + rn := len(r) + + for i := 0; i < min(ln, rn); i++ { + if l[i] == r[i] { + continue + } + + return l[i] < r[i] + } + + return ln < rn +} + +func (p *Rows) Swap(i, j int) { + s := p.s + + s[i], s[j] = s[j], s[i] +} + +func (p *Rows) String() string { + r := p.s + + var n int + for _, s := range r { + n += len(s.Line) + } + n += len(r) * len("\n") + + var sb strings.Builder + sb.Grow(n) + + for _, c := range r { + for _, rn := range c.Line { + sb.WriteByte(rn) + } + + sb.WriteString("\n") + } + + return sb.String() +} + +func load(sources []string) []byte { + if len(sources) == 0 { + return loadStdin() + } + + inputs := make([][]byte, 0, len(sources)) + for _, path := range sources { + if path == "-" { + inputs = append(inputs, loadStdin()) + + continue + } + + inputs = append(inputs, loadFile(path)) + } + + var totalSize int + for _, s := range inputs { + totalSize += len(s) + } + + var b bytes.Buffer + b.Grow(totalSize) + + for _, c := range inputs { + b.Write(c) + } + + return b.Bytes() +} + +func loadStdin() []byte { + b, err := io.ReadAll(os.Stdin) + if err != nil { + log.Fatalf("can't read stdin: %e", err) + } + + return b +} + +func loadFile(path string) []byte { + if _, err := os.Stat(path); err != nil { + log.Fatalf("file not exists: %s", path) + } + + content, err := os.ReadFile(path) + if err != nil { + log.Fatalf("file open file: %s", err) + } + + return content +} + +func splitLines(b, sp []byte, key int) []*Row { + r := make([]*Row, 0, bytes.Count(b, sp)) + for _, b := range bytes.Split(b, sp) { + var column []byte + + if key != 0 { + bs := bytes.Split(b, []byte(" ")) + + if len(bs) < key { + continue // TODO is it error or not? + // log.Fatalf("Column for key \"%d\" doesn't exists", cfg.Key) + } + + column = bs[key-1] + } + + r = append(r, &Row{Line: b, Column: column}) + } + + return r +} + +func uniques(r []*Row) []*Row { + m := make(map[string]*Row, len(r)) + + for _, r := range r { + r := r + + m[string(r.Line)] = r + } + + return maps.Values(m) +} diff --git a/sort-cli/main_test.go b/sort-cli/main_test.go new file mode 100644 index 0000000..9467053 --- /dev/null +++ b/sort-cli/main_test.go @@ -0,0 +1,57 @@ +package main + +import ( + "flag" + "github.com/stretchr/testify/assert" + "os" + "strings" + "testing" +) + +func TestFlags(t *testing.T) { + // We manipulate the Args to set them up for the testcases + // after this test we restore the initial args + oldArgs := os.Args + defer func() { os.Args = oldArgs }() + + cases := []struct { + Name string + Args []string + ExpectedExit int + ExpectedOutput string + }{ + {"No flags", + []string{"testdata/first"}, + 0, + "alabama barcelona\nbarcelona california\ncalifornia denver\ncalifornia denver\nамур брянск\nбелгород волгоград\nволгоград геленджик", + }, + {"Reverse", + []string{"-r", "testdata/first"}, + 0, + "волгоград геленджик\nбелгород волгоград\nамур брянск\ncalifornia denver\ncalifornia denver\nbarcelona california\nalabama barcelona", + }, + {"Unique", + []string{"-u", "testdata/first"}, + 0, + "alabama barcelona\nbarcelona california\ncalifornia denver\nамур брянск\nбелгород волгоград\nволгоград геленджик", + }, + {"Column 2", + []string{"-k=2", "testdata/first"}, + 0, + "alabama barcelona\nbarcelona california\ncalifornia denver\ncalifornia denver\nамур брянск\nбелгород волгоград\nволгоград геленджик", + }, + } + + for _, tc := range cases { + tc := tc + + t.Run(tc.Name, func(t *testing.T) { + // this call is required because otherwise flags panics, if args are set between flag.Parse calls + flag.CommandLine = flag.NewFlagSet(tc.Name, flag.ExitOnError) + // we need a value to set Args[0] to, cause flag begins parsing at Args[1] + os.Args = append([]string{tc.Name}, tc.Args...) + + assert.Equal(t, tc.ExpectedOutput, strings.Trim(run(), "\n")) + }) + } +} diff --git a/sort-cli/testdata/first b/sort-cli/testdata/first new file mode 100644 index 0000000..18f291a --- /dev/null +++ b/sort-cli/testdata/first @@ -0,0 +1,7 @@ +волгоград геленджик +амур брянск +alabama barcelona +california denver +california denver +barcelona california +белгород волгоград