Skip to content

Commit

Permalink
Merge pull request #197 from rusq/cli-remake-processor
Browse files Browse the repository at this point in the history
Chunks and processors
  • Loading branch information
rusq authored Mar 6, 2023
2 parents d74c925 + 2f668c8 commit f20b8c3
Show file tree
Hide file tree
Showing 90 changed files with 6,130 additions and 1,116 deletions.
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
# IDE
.idea
.vscode
.gonvim

#OS junk
.DS_Store
Expand Down Expand Up @@ -45,3 +46,9 @@ cmd/sdconv/sdconv
dist/
!.goreleaser.yaml
!schema.json
*.jsonl
*.state

# sundry junk used for testing and other fuckery
/tmp
*.dot
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,7 @@ callvis:

goreleaser:
goreleaser check
goreleaser release --snapshot --rm-dist
goreleaser release --snapshot --clean

tags:
gotags -R *.go > $@
7 changes: 7 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,13 @@ FAQ
Python application and runs in a browser. For the generic dump files, see
`examples`_ directory for some python and shell examples.

:Q: **My Slack Workspace is on the Free plan. Can I get data older than
90-days?**

:A: No, unfortunately you can't. Slack doesn't allow to export data older
than 90 days for free workspaces, the API does not return any data before 90
days for workspaces on the Free plan.

Thank you
=========
Big thanks to all contributors, who submitted a pull request, reported a bug,
Expand Down
33 changes: 5 additions & 28 deletions auth/browser/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,8 @@ import (
"errors"
"fmt"
"net/http"
"net/url"
"os"
"path/filepath"
"regexp"
"runtime"
"runtime/trace"
"strings"
Expand All @@ -29,9 +27,7 @@ type Client struct {

var Logger logger.Interface = logger.Default

var (
installFn = playwright.Install
)
var installFn = playwright.Install

// New create new browser based client.
func New(workspace string, opts ...Option) (*Client, error) {
Expand Down Expand Up @@ -118,7 +114,7 @@ func (cl *Client) Authenticate(ctx context.Context) (string, []*http.Cookie, err
return "", nil, err
}

token, err := extractToken(r.URL())
token, err := extractToken(r)
if err != nil {
return "", nil, err
}
Expand All @@ -135,7 +131,7 @@ func (cl *Client) Authenticate(ctx context.Context) (string, []*http.Cookie, err
}

func (cl *Client) withBrowserGuard(ctx context.Context, fn func()) error {
var done = make(chan struct{})
done := make(chan struct{})
go func() {
defer close(done)
fn()
Expand All @@ -150,27 +146,8 @@ func (cl *Client) withBrowserGuard(ctx context.Context, fn func()) error {
return nil
}

// tokenRE is the regexp that matches a valid Slack Client token.
var tokenRE = regexp.MustCompile(`xoxc-[0-9]+-[0-9]+-[0-9]+-[0-9a-z]{64}`)

func extractToken(uri string) (string, error) {
p, err := url.Parse(strings.TrimSpace(uri))
if err != nil {
return "", err
}
q := p.Query()
token := q.Get("token")
if token == "" {
return "", errors.New("token not found")
}
if !tokenRE.MatchString(token) {
return "", errors.New("invalid token value")
}
return token, nil
}

func convertCookies(pwc []playwright.Cookie) []*http.Cookie {
var ret = make([]*http.Cookie, 0, len(pwc))
ret := make([]*http.Cookie, 0, len(pwc))
for _, p := range pwc {
ret = append(ret, &http.Cookie{
Name: p.Name,
Expand Down Expand Up @@ -268,7 +245,7 @@ func pwIsKnownProblem(path string) error {
}
// check if the file is executable, and if yes, return an error, because
// we wouldn't know what to do.
if fi.Mode()&0111 != 0 {
if fi.Mode()&0o111 != 0 {
return errUnknownProblem
}
return nil
Expand Down
31 changes: 0 additions & 31 deletions auth/browser/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,37 +13,6 @@ import (
"github.com/playwright-community/playwright-go"
)

func Test_extractToken(t *testing.T) {
type args struct {
uri string
}
tests := []struct {
name string
args args
want string
wantErr bool
}{
{
"ok",
args{"https://ora600.slack.com/api/api.features?_x_id=noversion-1651817410.129&token=xoxc-610187951300-604451271234-3473161557912-4c426dd426a45208707725b710302b32dda0ab002b80ccd8c4c8ac9971a11558&platform=sonic&_x_should_cache=false&_x_allow_cached=true&_x_team_id=THY5HTZ8U&_x_gantry=true&fp=7c\n"},
"xoxc-610187951300-604451271234-3473161557912-4c426dd426a45208707725b710302b32dda0ab002b80ccd8c4c8ac9971a11558",
false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := extractToken(tt.args.uri)
if (err != nil) != tt.wantErr {
t.Errorf("extractToken() error = %v, wantErr %v", err, tt.wantErr)
return
}
if got != tt.want {
t.Errorf("extractToken() got = %v, want %v", got, tt.want)
}
})
}
}

func Test_float2time(t *testing.T) {
type args struct {
v float64
Expand Down
101 changes: 101 additions & 0 deletions auth/browser/extractors.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
package browser

import (
"errors"
"mime/multipart"
"net/http"
"net/url"
"regexp"
"strings"

"github.com/playwright-community/playwright-go"
)

//go:generate mockgen -package browser -destination playwright_test.go github.com/playwright-community/playwright-go Request

// tokenRE is the regexp that matches a valid Slack Client token.
var tokenRE = regexp.MustCompile(`xoxc-[0-9]+-[0-9]+-[0-9]+-[0-9a-z]{64}`)

const maxMultipartMem = 65536

var (
ErrNoToken = errors.New("no token found")
ErrInvalidTokenValue = errors.New("invalid token value")
ErrInvalidContentType = errors.New("invalid content-type header")
)

// extractToken extracts token from the request.
func extractToken(r playwright.Request) (string, error) {
if r.Method() == http.MethodGet {
return extractTokenGet(r.URL())
} else if r.Method() == http.MethodPost {
return extractTokenPost(r)
}
return "", errors.New("invalid request method")
}

// extractTokenGet extracts token from the query string.
func extractTokenGet(uri string) (string, error) {
p, err := url.Parse(strings.TrimSpace(uri))
if err != nil {
return "", err
}
q := p.Query()
token := q.Get("token")
if token == "" {
return "", ErrNoToken
}
if !tokenRE.MatchString(token) {
return "", ErrInvalidTokenValue
}
return token, nil
}

// extractTokenPost extracts token from the request body.
func extractTokenPost(r playwright.Request) (string, error) {
boundary, err := boundary(r)
if err != nil {
return "", err
}
data, err := r.PostData()
if err != nil {
return "", err
}
return tokenFromMultipart(data, boundary)
}

// tokenFromMultipart extracts token from the multipart form.
func tokenFromMultipart(s string, boundary string) (string, error) {
mp := multipart.NewReader(strings.NewReader(s), boundary)
form, err := mp.ReadForm(maxMultipartMem)
if err != nil {
return "", err
}
tok, ok := form.Value["token"]
if !ok {
return "", errors.New("token not found")
}
if len(tok) != 1 {
return "", errors.New("invalid token value")
}
if !tokenRE.MatchString(tok[0]) {
return "", errors.New("invalid token value")
}
return tok[0], nil
}

// boundary extracts boundary from the request.
func boundary(r playwright.Request) (string, error) {
values, err := r.HeaderValues("Content-Type")
if err != nil {
return "", err
}
if len(values) != 1 {
return "", ErrInvalidContentType
}
contentType, boundary, found := strings.Cut(values[0], "; boundary=")
if !found || contentType != "multipart/form-data" {
return "", ErrInvalidContentType
}
return boundary, nil
}
134 changes: 134 additions & 0 deletions auth/browser/extractors_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
package browser

import (
"errors"
"testing"

gomock "github.com/golang/mock/gomock"
)

const testMultipart = "-----------------------------37168696061856579082739228613\r\nContent-Disposition: form-data; name=\"token\"\r\n\r\nxoxc-888888888888-888888888888-8888888888888-fffffffffffffffa915fe069d70a8ad81743b0ec4ee9c81540af43f5e143264b\r\n-----------------------------37168696061856579082739228613\r\nContent-Disposition: form-data; name=\"platform\"\r\n\r\nsonic\r\n-----------------------------37168696061856579082739228613\r\nContent-Disposition: form-data; name=\"_x_should_cache\"\r\n\r\nfalse\r\n-----------------------------37168696061856579082739228613\r\nContent-Disposition: form-data; name=\"_x_allow_cached\"\r\n\r\ntrue\r\n-----------------------------37168696061856579082739228613\r\nContent-Disposition: form-data; name=\"_x_team_id\"\r\n\r\nTFCSDNRL5\r\n-----------------------------37168696061856579082739228613\r\nContent-Disposition: form-data; name=\"_x_gantry\"\r\n\r\ntrue\r\n-----------------------------37168696061856579082739228613\r\nContent-Disposition: form-data; name=\"_x_sonic\"\r\n\r\ntrue\r\n-----------------------------37168696061856579082739228613--\r\n"

var testHdrValues = []string{
"multipart/form-data; boundary=---------------------------37168696061856579082739228613",
}

const testBoundary = "---------------------------37168696061856579082739228613"

func Test_extractTokenGet(t *testing.T) {
type args struct {
uri string
}
tests := []struct {
name string
args args
want string
wantErr bool
}{
{
"ok",
args{"https://ora600.slack.com/api/api.features?_x_id=noversion-1651817410.129&token=xoxc-610187951300-604451271234-3473161557912-4c426dd426a45208707725b710302b32dda0ab002b80ccd8c4c8ac9971a11558&platform=sonic&_x_should_cache=false&_x_allow_cached=true&_x_team_id=THY5HTZ8U&_x_gantry=true&fp=7c\n"},
"xoxc-610187951300-604451271234-3473161557912-4c426dd426a45208707725b710302b32dda0ab002b80ccd8c4c8ac9971a11558",
false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := extractTokenGet(tt.args.uri)
if (err != nil) != tt.wantErr {
t.Errorf("extractToken() error = %v, wantErr %v", err, tt.wantErr)
return
}
if got != tt.want {
t.Errorf("extractToken() got = %v, want %v", got, tt.want)
}
})
}
}

func Test_tokenFromMultipart(t *testing.T) {
type args struct {
s string
boundary string
}
tests := []struct {
name string
args args
want string
wantErr bool
}{
{"ok", args{testMultipart, testBoundary}, "xoxc-888888888888-888888888888-8888888888888-fffffffffffffffa915fe069d70a8ad81743b0ec4ee9c81540af43f5e143264b", false},
{"bad boundary", args{testMultipart, "bad"}, "", true},
{"bad multipart", args{"bad", testBoundary}, "", true},
{"empty", args{"", testBoundary}, "", true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := tokenFromMultipart(tt.args.s, tt.args.boundary)
if (err != nil) != tt.wantErr {
t.Errorf("extractTokenPost() error = %v, wantErr %v", err, tt.wantErr)
return
}
if got != tt.want {
t.Errorf("extractTokenPost() = %v, want %v", got, tt.want)
}
})
}
}

func Test_boundary(t *testing.T) {
tests := []struct {
name string
expect func(r *MockRequest)
want string
wantErr bool
}{
{
"ok",
func(r *MockRequest) {
r.EXPECT().HeaderValues("Content-Type").Return(testHdrValues, nil)
},
testBoundary,
false,
},
{
"no header",
func(r *MockRequest) {
r.EXPECT().HeaderValues("Content-Type").Return(nil, nil)
},
"",
true,
},
{
"bad header",
func(r *MockRequest) {
r.EXPECT().HeaderValues("Content-Type").Return([]string{"bad"}, nil)
},
"",
true,
},
{
"error",
func(r *MockRequest) {
r.EXPECT().HeaderValues("Content-Type").Return(nil, errors.New("bad"))
},
"",
true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ctrl := gomock.NewController(t)
mr := NewMockRequest(ctrl)
tt.expect(mr)
got, err := boundary(mr)
if (err != nil) != tt.wantErr {
t.Errorf("boundary() error = %v, wantErr %v", err, tt.wantErr)
return
}
if got != tt.want {
t.Errorf("boundary() = %v, want %v", got, tt.want)
}
})
}
}
Loading

0 comments on commit f20b8c3

Please sign in to comment.