diff --git a/internal/chunk/obfuscate/ids.go b/internal/chunk/obfuscate/ids.go new file mode 100644 index 00000000..20382a1a --- /dev/null +++ b/internal/chunk/obfuscate/ids.go @@ -0,0 +1,41 @@ +package obfuscate + +import ( + "encoding/hex" + "strings" +) + +const ( + userPrefix = "UO" + chanPrefix = "CO" + filePrefix = "FO" + teamPrefix = "TO" +) + +// ID obfuscates an ID. +func (o obfuscator) ID(prefix string, id string) string { + if id == "" { + return "" + } + h := o.hasher() + if _, err := h.Write([]byte(o.salt + id)); err != nil { + panic(err) + } + return prefix + strings.ToUpper(hex.EncodeToString(h.Sum(nil)))[:len(id)-1] +} + +func (o obfuscator) UserID(u string) string { + return o.ID(userPrefix, u) +} + +func (o obfuscator) ChannelID(c string) string { + return o.ID(chanPrefix, c) +} + +func (o obfuscator) FileID(f string) string { + return o.ID(filePrefix, f) +} + +func (o obfuscator) TeamID(g string) string { + return o.ID(teamPrefix, g) +} diff --git a/internal/chunk/obfuscate/obfuscate.go b/internal/chunk/obfuscate/obfuscate.go index 1e41121e..1bf7d27d 100644 --- a/internal/chunk/obfuscate/obfuscate.go +++ b/internal/chunk/obfuscate/obfuscate.go @@ -8,7 +8,6 @@ package obfuscate import ( "context" "crypto/sha256" - "encoding/hex" "encoding/json" "hash" "io" @@ -80,7 +79,7 @@ type obfuscator struct { } func (o obfuscator) Chunk(c *chunk.Chunk) { - c.ChannelID = o.ID("C", c.ChannelID) + c.ChannelID = o.ChannelID(c.ChannelID) switch c.Type { case chunk.CMessages: o.Messages(c.Messages...) @@ -105,7 +104,7 @@ func (o obfuscator) Messages(m ...slack.Message) { } } -const filePrefix = "https://files.slack.com/" +const fileURLPrefix = "https://files.slack.com/" func notNilFn(s string, fn func(string) string) string { if s != "" { @@ -119,14 +118,14 @@ func (o obfuscator) OneMessage(m *slack.Message) { return } m.ClientMsgID = notNilFn(m.ClientMsgID, func(s string) string { return randomUUID() }) - m.Team = o.ID("T", m.Team) - m.Channel = o.ID("C", m.Channel) - m.User = o.ID("U", m.User) + m.Team = o.TeamID(m.Team) + m.Channel = o.UserID(m.Channel) + m.User = o.UserID(m.User) if m.Text != "" { m.Text = randomString(len(m.Text)) } if m.Edited != nil { - m.Edited.User = o.ID("U", m.Edited.User) + m.Edited.User = o.UserID(m.Edited.User) } if len(m.Blocks.BlockSet) > 0 { m.Blocks.BlockSet = nil // too much hassle to obfuscate @@ -138,7 +137,7 @@ func (o obfuscator) OneMessage(m *slack.Message) { m.Attachments = nil // too much hassle to obfuscate } if m.ParentUserId != "" { - m.ParentUserId = o.ID("U", m.ParentUserId) + m.ParentUserId = o.UserID(m.ParentUserId) } for i := range m.Files { o.OneFile(&m.Files[i]) @@ -157,8 +156,8 @@ func (o obfuscator) OneFile(f *slack.File) { } ifnotnil := func(s string) string { if s != "" { - if strings.HasPrefix(s, filePrefix) { - s = filePrefix + randomString(len(s)-len(filePrefix)) + if strings.HasPrefix(s, fileURLPrefix) { + s = fileURLPrefix + randomString(len(s)-len(fileURLPrefix)) } else { s = randomString(len(s)) } @@ -198,8 +197,8 @@ func (o obfuscator) OneFile(f *slack.File) { f.OriginalW = 0 f.OriginalH = 0 f.InitialComment = slack.Comment{} - f.User = o.ID("U", f.User) - f.ID = o.ID("F", f.ID) + f.User = o.UserID(f.User) + f.ID = o.FileID(f.ID) } // randomString returns a random string of length n + random number [0,40). @@ -241,23 +240,11 @@ func randomUUID() string { return string(b) } -// ID obfuscates an ID. -func (o obfuscator) ID(prefix string, id string) string { - if id == "" { - return "" - } - h := o.hasher() - if _, err := h.Write([]byte(o.salt + id)); err != nil { - panic(err) - } - return prefix + strings.ToUpper(hex.EncodeToString(h.Sum(nil)))[:len(id)-1] -} - func (o obfuscator) Reactions(r []slack.ItemReaction) { for i := range r { r[i].Name = randomStringExact(len(r[i].Name)) for j := range r[i].Users { - r[i].Users[j] = o.ID("U", r[i].Users[j]) + r[i].Users[j] = o.UserID(r[i].Users[j]) } } } @@ -266,13 +253,18 @@ func (o obfuscator) Channel(c *slack.Channel) { if c == nil { return } - c.ID = o.ID("C", c.ID) - c.Creator = o.ID("U", c.Creator) + c.ID = o.ChannelID(c.ID) + c.Creator = o.UserID(c.Creator) c.Name = o.ID("", c.Name) c.NameNormalized = o.ID("", c.NameNormalized) + c.Purpose.Value = randomStringExact(len(c.Purpose.Value)) + c.Purpose.Creator = o.UserID(c.Purpose.Creator) + c.Topic.Value = randomStringExact(len(c.Topic.Value)) + c.Topic.Creator = o.UserID(c.Topic.Creator) + for i := range c.Members { - c.Members[i] = o.ID("U", c.Members[i]) + c.Members[i] = o.UserID(c.Members[i]) } } diff --git a/internal/chunk/obfuscate/obfuscate_test.go b/internal/chunk/obfuscate/obfuscate_test.go index f3bbb893..2876377e 100644 --- a/internal/chunk/obfuscate/obfuscate_test.go +++ b/internal/chunk/obfuscate/obfuscate_test.go @@ -141,10 +141,10 @@ func Test_obfuscator_OneMessage(t *testing.T) { ClientMsgID: "a29ab0f5-808b-bc8e-f22e-b4ac1a00fcd4", Type: "message", Channel: "", - User: "U8EEA06E1", + User: userPrefix + "8EEA06E1", Text: "9 LVwGabEN7FkWNmyD0HtOdvcYYvfHfF hVA6Nd1BtVOw52BH40tQ4xsZr1rbOE", Timestamp: "1645095505.023899", - Team: "TEBC93378", + Team: teamPrefix + "EBC93378", }, }, }, @@ -199,7 +199,7 @@ func Test_obfuscator_OneFile(t *testing.T) { f: fixtures.Load[*slack.File](fixtures.FileJPEG), }, wantFile: &slack.File{ - ID: "F8B5BAA15C4", + ID: filePrefix + "8B5BAA15C4", Created: 1638784624, Timestamp: 1638784624, Name: "N1CIe93m6sjyQtxxQ", @@ -207,7 +207,7 @@ func Test_obfuscator_OneFile(t *testing.T) { Mimetype: "image/jpeg", Filetype: "jpg", PrettyType: "JPEG", - User: "U8EEA06E1", + User: userPrefix + "8EEA06E1", Mode: "hosted", Size: 359002, URLPrivate: "https://files.slack.com/jXUJR9JT5pul5g8MDbK7E1ycTwBhzdJG9 LVwGabEN7FkWNmyD0HtOdvcYYvfHfF hVA6Nd1BtVOw52BH40tQ4xsZr1rbOEdndtLrooKH5L9GzLgWmmWfVTBKfSvym98qEQMYaWdLEKrJCEXzYB2bFiOLzhK", @@ -246,10 +246,12 @@ var testChan = &slack.Channel{ IsArchived: false, Creator: "U024BE7LH", Topic: slack.Topic{ - Value: "Fun times", + Value: "Fun times", + Creator: "U024BE7LV", }, Purpose: slack.Purpose{ - Value: "A place for non-work-related flimflam.", + Value: "A place for non-work-related flimflam.", + Creator: "U024BE7LH", }, Conversation: slack.Conversation{ ID: "C0G9QF9GW", @@ -287,15 +289,17 @@ func Test_obfuscator_Channel(t *testing.T) { GroupConversation: slack.GroupConversation{ Name: "55D55", IsArchived: false, - Creator: "UF209DFAC", + Creator: userPrefix + "F209DFAC", Topic: slack.Topic{ - Value: "GabEN7FkW", + Value: "GabEN7FkW", + Creator: userPrefix + "0077C5B4", }, Purpose: slack.Purpose{ - Value: "2jXUJR9JT5pul5g8MDbK7E1ycTwBhzdJG9 LVw", + Value: "2jXUJR9JT5pul5g8MDbK7E1ycTwBhzdJG9 LVw", + Creator: userPrefix + "F209DFAC", }, Conversation: slack.Conversation{ - ID: "C0250C11A", + ID: chanPrefix + "0250C11A", IsGroup: false, Created: 1449252882, }, diff --git a/internal/fixtures/assets/chunks.jsonl.gz b/internal/fixtures/assets/chunks.jsonl.gz index 962a5ac3..8c83967a 100644 Binary files a/internal/fixtures/assets/chunks.jsonl.gz and b/internal/fixtures/assets/chunks.jsonl.gz differ diff --git a/internal/fixtures/chunk.go b/internal/fixtures/chunk.go index 96acac50..a177341f 100644 --- a/internal/fixtures/chunk.go +++ b/internal/fixtures/chunk.go @@ -14,7 +14,7 @@ import ( //go:embed assets/chunks.jsonl.gz var chunksJsonlGz []byte -const ChunkFileChannelID = "C73D19AAE17" +const ChunkFileChannelID = "CO73D19AAE17" // chunksJSONL returns a reader for the b []byte, which assumed to be a // gzip-compressed bytes slice. It panics on error.