Skip to content

Commit

Permalink
split: add flag --ignore-case. #462
Browse files Browse the repository at this point in the history
  • Loading branch information
shenwei356 committed Apr 29, 2024
1 parent 9b1de66 commit f8ab09c
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 0 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
- Fix a big introduced in v2.7.0. When more than one pairs of primers are given, only the last one is used. [#457](https://github.com/shenwei356/seqkit/issues/457)
- `seqkit translate`:
- Add option `-e/--skip-translate-errors` to skip translate error and output empty sequence. [#458](https://github.com/shenwei356/seqkit/pull/458)
- `seqkit split`:
- Add flag `--ignore-case` for `-i/--by-id`. [#462](https://github.com/shenwei356/seqkit/issues/462)
- [SeqKit v2.8.1](https://github.com/shenwei356/seqkit/releases/tag/v2.8.1) - 2024-04-07
[![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/seqkit/v2.8.1/total.svg)](https://github.com/shenwei356/seqkit/releases/tag/v2.8.1)
- `seqkit sana`:
Expand Down
1 change: 1 addition & 0 deletions doc/docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -2588,6 +2588,7 @@ Flags:
-e, --extension string set output file extension, e.g., ".gz", ".xz", or ".zst"
-f, --force overwrite output directory
-h, --help help for split
--ignore-case ignore case when using -i/--by-id
-k, --keep-temp keep temporary FASTA and .fai file when using 2-pass mode
-O, --out-dir string output directory (default value is $infile.split)
-2, --two-pass two-pass mode read files twice to lower memory usage. (only for FASTA
Expand Down
8 changes: 8 additions & 0 deletions seqkit/cmd/split.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ Examples:
part := getFlagNonNegativeInt(cmd, "by-part")

byID := getFlagBool(cmd, "by-id")
ignoreCase := getFlagBool(cmd, "ignore-case")
region := getFlagString(cmd, "by-region")
twoPass := getFlagBool(cmd, "two-pass")
updateFaidx := getFlagBool(cmd, "update-faidx")
Expand Down Expand Up @@ -619,6 +620,9 @@ Examples:
renameFileExt = false
}
id = string(record.ID)
if ignoreCase {
id = strings.ToLower(id)
}
if _, ok := recordsByID[id]; !ok {
recordsByID[id] = []*fastx.Record{}
}
Expand Down Expand Up @@ -718,6 +722,9 @@ Examples:
idsMap := make(map[string][]string)
for _, ID := range IDs {
id := string(fastx.ParseHeadID(idRe, []byte(ID)))
if ignoreCase {
id = strings.ToLower(id)
}
if _, ok := idsMap[id]; !ok {
idsMap[id] = []string{}
}
Expand Down Expand Up @@ -1020,6 +1027,7 @@ func init() {
splitCmd.Flags().IntP("by-size", "s", 0, "split sequences into multi parts with N sequences")
splitCmd.Flags().IntP("by-part", "p", 0, "split sequences into N parts")
splitCmd.Flags().BoolP("by-id", "i", false, "split squences according to sequence ID")
splitCmd.Flags().BoolP("ignore-case", "", false, "ignore case when using -i/--by-id")
splitCmd.Flags().StringP("by-region", "r", "", "split squences according to subsequence of given region. "+
`e.g 1:12 for first 12 bases, -12:-1 for last 12 bases. type "seqkit split -h" for more examples`)
splitCmd.Flags().BoolP("two-pass", "2", false, "two-pass mode read files twice to lower memory usage. (only for FASTA format)")
Expand Down

0 comments on commit f8ab09c

Please sign in to comment.