From afcc7b3d15e2d402035ea2ca447d102a3f7468fb Mon Sep 17 00:00:00 2001 From: Will Date: Fri, 10 Sep 2021 02:17:31 -0700 Subject: [PATCH] Add `list` and `filter` commands (#227) * better name for detaching index command * add list and filter commands --- v2/cmd/car/car.go | 26 ++++++-- v2/cmd/car/{split.go => detach.go} | 4 +- v2/cmd/car/filter.go | 98 ++++++++++++++++++++++++++++++ v2/cmd/car/list.go | 53 ++++++++++++++++ 4 files changed, 175 insertions(+), 6 deletions(-) rename v2/cmd/car/{split.go => detach.go} (83%) create mode 100644 v2/cmd/car/filter.go create mode 100644 v2/cmd/car/list.go diff --git a/v2/cmd/car/car.go b/v2/cmd/car/car.go index 9f481cb4..df962938 100644 --- a/v2/cmd/car/car.go +++ b/v2/cmd/car/car.go @@ -28,10 +28,28 @@ func main() { }, }, { - Name: "split", - Aliases: []string{"s"}, - Usage: "Split an index to a detached file", - Action: SplitCar, + Name: "detach-index", + Usage: "Detach an index to a detached file", + Action: DetachCar, + }, + { + Name: "list", + Aliases: []string{"l"}, + Usage: "List the CIDs in a car", + Action: ListCar, + }, + { + Name: "filter", + Aliases: []string{"f"}, + Usage: "Filter the CIDs in a car", + Action: FilterCar, + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "cid-file", + Usage: "A file to read CIDs from", + TakesFile: true, + }, + }, }, }, } diff --git a/v2/cmd/car/split.go b/v2/cmd/car/detach.go similarity index 83% rename from v2/cmd/car/split.go rename to v2/cmd/car/detach.go index 7733e19c..276d73b4 100644 --- a/v2/cmd/car/split.go +++ b/v2/cmd/car/detach.go @@ -9,8 +9,8 @@ import ( "github.com/urfave/cli/v2" ) -// SplitCar is a command to output the index part of a car. -func SplitCar(c *cli.Context) error { +// DetachCar is a command to output the index part of a car. +func DetachCar(c *cli.Context) error { r, err := carv2.OpenReader(c.Args().Get(0)) if err != nil { return err diff --git a/v2/cmd/car/filter.go b/v2/cmd/car/filter.go new file mode 100644 index 00000000..cb58b4c1 --- /dev/null +++ b/v2/cmd/car/filter.go @@ -0,0 +1,98 @@ +package main + +import ( + "bufio" + "fmt" + "io" + "os" + "strings" + + "github.com/ipfs/go-cid" + carv2 "github.com/ipld/go-car/v2" + "github.com/ipld/go-car/v2/blockstore" + icarv1 "github.com/ipld/go-car/v2/internal/carv1" + "github.com/urfave/cli/v2" +) + +// FilterCar is a command to select a subset of a car by CID. +func FilterCar(c *cli.Context) error { + r, err := carv2.OpenReader(c.Args().Get(0)) + if err != nil { + return err + } + defer r.Close() + + if c.Args().Len() < 2 { + return fmt.Errorf("an output filename must be provided") + } + roots, err := r.Roots() + if err != nil { + return err + } + bs, err := blockstore.OpenReadWrite(c.Args().Get(1), roots) + if err != nil { + return err + } + + // Get the set of CIDs from stdin. + inStream := os.Stdin + if c.IsSet("cidFile") { + inStream, err = os.Open(c.String("cidFile")) + if err != nil { + return err + } + defer inStream.Close() + } + cidMap, err := parseCIDS(inStream) + if err != nil { + return err + } + fmt.Printf("filtering to %d cids\n", len(cidMap)) + + rd, err := icarv1.NewCarReader(r.DataReader()) + if err != nil { + return err + } + + for { + blk, err := rd.Next() + if err != nil { + if err == io.EOF { + break + } + return err + } + if _, ok := cidMap[blk.Cid()]; ok { + if err := bs.Put(blk); err != nil { + return err + } + } + } + return bs.Finalize() +} + +func parseCIDS(r io.Reader) (map[cid.Cid]struct{}, error) { + cids := make(map[cid.Cid]struct{}) + br := bufio.NewReader(r) + for { + line, _, err := br.ReadLine() + if err != nil { + if err == io.EOF { + return cids, nil + } + return nil, err + } + trimLine := strings.TrimSpace(string(line)) + if len(trimLine) == 0 { + continue + } + c, err := cid.Parse(trimLine) + if err != nil { + return nil, err + } + if _, ok := cids[c]; ok { + fmt.Fprintf(os.Stderr, "duplicate cid: %s\n", c) + } + cids[c] = struct{}{} + } +} diff --git a/v2/cmd/car/list.go b/v2/cmd/car/list.go new file mode 100644 index 00000000..e9cf1f7e --- /dev/null +++ b/v2/cmd/car/list.go @@ -0,0 +1,53 @@ +package main + +import ( + "fmt" + "io" + "os" + + carv2 "github.com/ipld/go-car/v2" + "github.com/urfave/cli/v2" +) + +// ListCar is a command to output the cids in a car. +func ListCar(c *cli.Context) error { + inStream := os.Stdin + var err error + if c.Args().Len() >= 1 { + inStream, err = os.Open(c.Args().First()) + if err != nil { + return err + } + defer inStream.Close() + } + rd, err := carv2.NewBlockReader(inStream) + if err != nil { + return err + } + + outStream := os.Stdout + if c.Args().Len() >= 2 { + outStream, err = os.Create(c.Args().Get(1)) + if err != nil { + return err + } + } + defer outStream.Close() + + if err != nil { + return err + } + + for { + blk, err := rd.Next() + if err != nil { + if err == io.EOF { + break + } + return err + } + fmt.Fprintf(outStream, "%s\n", blk.Cid()) + } + + return err +}