Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add profile. #53

Merged
merged 10 commits into from
Aug 18, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ require (
github.com/charmbracelet/glamour v0.3.0
github.com/efficientgo/tools/core v0.0.0-20210609125236-d73259166f20
github.com/efficientgo/tools/extkingpin v0.0.0-20210609125236-d73259166f20
github.com/felixge/fgprof v0.9.1
github.com/go-kit/kit v0.10.0
github.com/gobwas/glob v0.2.3
github.com/gocolly/colly/v2 v2.1.1-0.20201013153555-8252c346cfb0
Expand All @@ -20,11 +21,12 @@ require (
github.com/muesli/reflow v0.2.1-0.20210115123740-9e1d0d53df68 // indirect
github.com/oklog/run v1.1.0
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.11.0
github.com/prometheus/common v0.30.0
github.com/sergi/go-diff v1.0.0
github.com/theckman/yacspin v0.8.0
github.com/yuin/goldmark v1.3.5
golang.org/x/lint v0.0.0-20200302205851-738671d3881b // indirect
golang.org/x/net v0.0.0-20210331212208-0fccb6fa2b5c
golang.org/x/net v0.0.0-20210525063256-abc453219eb5
golang.org/x/tools v0.0.0-20201020161133-226fd2f889ca // indirect
gopkg.in/alecthomas/kingpin.v2 v2.2.6
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776
Expand Down
271 changes: 264 additions & 7 deletions go.sum

Large diffs are not rendered by default.

105 changes: 100 additions & 5 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@ import (
"os"
"os/signal"
"path/filepath"
"runtime"
"runtime/pprof"
"strings"
"syscall"
"time"

"github.com/bwplotka/mdox/pkg/clilog"
"github.com/bwplotka/mdox/pkg/extkingpin"
Expand All @@ -20,11 +23,16 @@ import (
"github.com/bwplotka/mdox/pkg/transform"
"github.com/bwplotka/mdox/pkg/version"
"github.com/charmbracelet/glamour"
"github.com/efficientgo/tools/core/pkg/errcapture"
"github.com/efficientgo/tools/core/pkg/logerrcapture"
extflag "github.com/efficientgo/tools/extkingpin"
"github.com/felixge/fgprof"
"github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"
"github.com/oklog/run"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/expfmt"
"gopkg.in/alecthomas/kingpin.v2"
)

Expand All @@ -34,6 +42,11 @@ const (
logFormatCLILog = "clilog"
)

type mdoxMetrics struct {
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need extra struct?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was kind of keeping the struct there since I was facing issues using just reg variable(metrics not registering due to how CLI operates). Will try out something else and see if I can remove.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've kept it for the Print() function now. 🙂

reg *prometheus.Registry
dir string
}

func setupLogger(logLevel, logFormat string) log.Logger {
var lvl level.Option
switch logLevel {
Expand Down Expand Up @@ -66,14 +79,33 @@ func main() {
Default("info").Enum("error", "warn", "info", "debug")
logFormat := app.Flag("log.format", "Log format to use.").
Default(logFormatCLILog).Enum(logFormatLogfmt, logFormatJson, logFormatCLILog)
// Profiling and metrics.
profilesPath := app.Flag("debug.profiles", "Path to which CPU and heap profiles are saved").Hidden().String()
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
profilesPath := app.Flag("debug.profiles", "Path to which CPU and heap profiles are saved").Hidden().String()
profilesPath := app.Flag("profiles.path", "Path to directory where CPU and heap profiles will be saved; If empty, no profiling will be enabled. ").ExistingDir()

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

metrics := app.Flag("metrics", "Path to which metrics are saved in OpenMetrics format").Hidden().String()
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto for above changes (:

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!


m := &mdoxMetrics{}

ctx, cancel := context.WithCancel(context.Background())
registerFmt(ctx, app)
registerFmt(ctx, app, m)
registerTransform(ctx, app)

cmd, runner := app.Parse()
logger := setupLogger(*logLevel, *logFormat)

if *metrics != "" {
m.dir = *metrics
m.reg = prometheus.NewRegistry()
}

if *profilesPath != "" {
finalize, err := snapshotProfiles(*profilesPath)
if err != nil {
level.Error(logger).Log("err", errors.Wrapf(err, "%s profiles init failed", cmd))
os.Exit(1)
}
defer logerrcapture.Do(logger, finalize, "profiles")
}

var g run.Group
g.Add(func() error {
// TODO(bwplotka): Move to customized better setup function.
Expand Down Expand Up @@ -101,6 +133,65 @@ func main() {
level.Error(logger).Log("err", errors.Wrapf(err, "%s command failed", cmd))
os.Exit(1)
}

}

func snapshotProfiles(dir string) (func() error, error) {
now := time.Now().UTC()
if err := os.MkdirAll(filepath.Join(dir, strings.ReplaceAll(now.Format(time.UnixDate), " ", "_")), os.ModePerm); err != nil {
return nil, err
}
f, err := os.OpenFile(filepath.Join(dir, strings.ReplaceAll(now.Format(time.UnixDate), " ", "_"), "fgprof.pb.gz"), os.O_CREATE|os.O_TRUNC|os.O_WRONLY, os.ModePerm)
if err != nil {
return nil, err
}

m, err := os.OpenFile(filepath.Join(dir, strings.ReplaceAll(now.Format(time.UnixDate), " ", "_"), "memprof.pb.gz"), os.O_CREATE|os.O_TRUNC|os.O_WRONLY, os.ModePerm)
if err != nil {
return nil, err
}
runtime.GC()

if err := pprof.WriteHeapProfile(m); err != nil {
return nil, err
}

fgFunc := fgprof.Start(f, fgprof.FormatPprof)

return func() (err error) {
defer errcapture.Do(&err, f.Close, "close")
return fgFunc()
}, nil
}

func (m *mdoxMetrics) Print() error {
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we printing here? (::

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think you need struct.. just Dump(r prometheus.Registry, dir string) error would be more cleaner, WDYT?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

mfs, err := m.reg.Gather()
if err != nil {
return err
}
now := time.Now().UTC()
if err := os.MkdirAll(filepath.Join(m.dir, strings.ReplaceAll(now.Format(time.UnixDate), " ", "_")), os.ModePerm); err != nil {
return err
}
f, err := os.OpenFile(filepath.Join(m.dir, strings.ReplaceAll(now.Format(time.UnixDate), " ", "_"), "metrics"), os.O_CREATE|os.O_TRUNC|os.O_WRONLY, os.ModePerm)
if err != nil {
return err
}
defer f.Close()

for _, mf := range mfs {
for _, metric := range mf.Metric {
unixTime := now.Unix()
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we cache now, so we have consistent timestamp across all series?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we do this already! now is defined when constructing dir name and is used across all metrics. So it's the same across all series.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

all good!

metric.TimestampMs = &unixTime
}
if _, err := expfmt.MetricFamilyToOpenMetrics(f, mf); err != nil {
return err
}
}
if _, err = expfmt.FinalizeOpenMetrics(f); err != nil {
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For future: Let's document how one can use those and import to Prometheus 🤗

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure!

return err
}
return nil
}

func interrupt(logger log.Logger, cancel <-chan struct{}) error {
Expand All @@ -115,7 +206,7 @@ func interrupt(logger log.Logger, cancel <-chan struct{}) error {
}
}

func registerFmt(_ context.Context, app *extkingpin.App) {
func registerFmt(_ context.Context, app *extkingpin.App, m *mdoxMetrics) {
cmd := app.Command("fmt", "Formats in-place given markdown files uniformly following GFM (Github Flavored Markdown: https://github.github.com/gfm/). Example: mdox fmt *.md")
files := cmd.Arg("files", "Markdown file(s) to process.").Required().ExistingFiles()
checkOnly := cmd.Flag("check", "If true, fmt will not modify the given files, instead it will fail if files needs formatting").Bool()
Expand All @@ -131,6 +222,10 @@ This directive runs executable with arguments and put its stderr and stdout outp
linksValidateConfig := extflag.RegisterPathOrContent(cmd, "links.validate.config", "YAML file for skipping link check, with spec defined in github.com/bwplotka/mdox/pkg/linktransformer.ValidatorConfig", extflag.WithEnvSubstitution())

cmd.Run(func(ctx context.Context, logger log.Logger) (err error) {
if m.reg != nil {
defer logerrcapture.Do(logger, m.Print, "print")
}

var opts []mdformatter.Option
if !*disableGenCodeBlocksDirectives {
opts = append(opts, mdformatter.WithCodeBlockTransformer(mdgen.NewCodeBlockTransformer()))
Expand All @@ -157,7 +252,7 @@ This directive runs executable with arguments and put its stderr and stdout outp
if err != nil {
return err
}
v, err := linktransformer.NewValidator(ctx, logger, validateConfigContent, anchorDir)
v, err := linktransformer.NewValidator(ctx, logger, validateConfigContent, anchorDir, m.reg)
if err != nil {
return err
}
Expand All @@ -172,7 +267,7 @@ This directive runs executable with arguments and put its stderr and stdout outp
}

if *checkOnly {
diff, err := mdformatter.IsFormatted(ctx, logger, *files, opts...)
diff, err := mdformatter.IsFormatted(ctx, logger, *files, m.reg, opts...)
if err != nil {
return err
}
Expand All @@ -193,7 +288,7 @@ This directive runs executable with arguments and put its stderr and stdout outp
return errors.Errorf("files not formatted: %v", diffOut)

}
return mdformatter.Format(ctx, logger, *files, opts...)
return mdformatter.Format(ctx, logger, *files, m.reg, opts...)
})
}

Expand Down
82 changes: 80 additions & 2 deletions pkg/mdformatter/linktransformer/link.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"context"
"io"
"net/http"
"net/url"
"os"
"path/filepath"
"regexp"
Expand All @@ -24,6 +25,8 @@ import (
"github.com/go-kit/kit/log/level"
"github.com/gocolly/colly/v2"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
)

var remoteLinkPrefixRe = regexp.MustCompile(`^http[s]?://`)
Expand All @@ -35,6 +38,56 @@ var (
IDNotFoundErr = LookupError(errors.New("file exists, but does not have such id"))
)

type linktransformerMetrics struct {
localLinksChecked prometheus.Counter
remoteLinksChecked prometheus.Counter
roundTripLinks prometheus.Counter
githubSkippedLinks prometheus.Counter
ignoreSkippedLinks prometheus.Counter

collyRequests *prometheus.CounterVec
collyPerDomainLatency *prometheus.HistogramVec
}

func newLinktransformerMetrics(reg *prometheus.Registry) *linktransformerMetrics {
l := &linktransformerMetrics{}

l.localLinksChecked = prometheus.NewCounter(prometheus.CounterOpts{
Name: "mdox_local_links_total",
Help: "The total number of local links which were checked",
})
l.remoteLinksChecked = prometheus.NewCounter(prometheus.CounterOpts{
Name: "mdox_remote_links_total",
Help: "The total number of remote links which were checked",
})
l.roundTripLinks = prometheus.NewCounter(prometheus.CounterOpts{
Name: "mdox_round_trip_links_total",
Help: "The total number of links which were roundtrip checked",
})
l.githubSkippedLinks = prometheus.NewCounter(prometheus.CounterOpts{
Name: "mdox_github_skipped_links_total",
Help: "The total number of links which were github checked",
})
l.ignoreSkippedLinks = prometheus.NewCounter(prometheus.CounterOpts{
Name: "mdox_ignore_skipped_links_total",
Help: "The total number of links which were ignore checked",
})

l.collyRequests = prometheus.NewCounterVec(
prometheus.CounterOpts{Name: "mdox_colly_requests_total"},
[]string{},
)
l.collyPerDomainLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{Name: "mdox_colly_per_domain_latency", Buckets: prometheus.DefBuckets},
[]string{"domain"},
)

if reg != nil {
reg.MustRegister(l.localLinksChecked, l.remoteLinksChecked, l.roundTripLinks, l.githubSkippedLinks, l.ignoreSkippedLinks, l.collyRequests, l.collyPerDomainLatency)
}
return l
}

const (
originalURLKey = "originalURLKey"
numberOfRetriesKey = "retryKey"
Expand Down Expand Up @@ -126,6 +179,9 @@ type validator struct {

futureMu sync.Mutex
destFutures map[futureKey]*futureResult

l *linktransformerMetrics
transportFn func(url string) http.RoundTripper
}

type futureKey struct {
Expand All @@ -140,7 +196,7 @@ type futureResult struct {

// NewValidator returns mdformatter.LinkTransformer that crawls all links.
// TODO(bwplotka): Add optimization and debug modes - this is the main source of latency and pain.
func NewValidator(ctx context.Context, logger log.Logger, linksValidateConfig []byte, anchorDir string) (mdformatter.LinkTransformer, error) {
func NewValidator(ctx context.Context, logger log.Logger, linksValidateConfig []byte, anchorDir string, reg *prometheus.Registry) (mdformatter.LinkTransformer, error) {
var err error
config := Config{}
if string(linksValidateConfig) != "" {
Expand All @@ -149,6 +205,7 @@ func NewValidator(ctx context.Context, logger log.Logger, linksValidateConfig []
return nil, err
}
}

v := &validator{
logger: logger,
anchorDir: anchorDir,
Expand All @@ -157,7 +214,24 @@ func NewValidator(ctx context.Context, logger log.Logger, linksValidateConfig []
remoteLinks: map[string]error{},
c: colly.NewCollector(colly.Async(), colly.StdlibContext(ctx)),
destFutures: map[futureKey]*futureResult{},
l: &linktransformerMetrics{},
transportFn: func(url string) http.RoundTripper {
return http.DefaultTransport
},
}

v.l = newLinktransformerMetrics(reg)
v.transportFn = func(u string) http.RoundTripper {
parsed, err := url.Parse(u)
if err != nil {
panic(err)
}
return promhttp.InstrumentRoundTripperCounter(
v.l.collyRequests,
promhttp.InstrumentRoundTripperDuration(v.l.collyPerDomainLatency.MustCurryWith(prometheus.Labels{"domain": parsed.Host}), http.DefaultTransport),
)
}

// Set very soft limits.
// E.g github has 50-5000 https://docs.github.com/en/free-pro-team@latest/rest/reference/rate-limit limit depending
// on api (only search is below 100).
Expand Down Expand Up @@ -226,7 +300,7 @@ func NewValidator(ctx context.Context, logger log.Logger, linksValidateConfig []

// MustNewValidator returns mdformatter.LinkTransformer that crawls all links.
func MustNewValidator(logger log.Logger, linksValidateConfig []byte, anchorDir string) mdformatter.LinkTransformer {
v, err := NewValidator(context.TODO(), logger, linksValidateConfig, anchorDir)
v, err := NewValidator(context.TODO(), logger, linksValidateConfig, anchorDir, nil)
if err != nil {
panic(err)
}
Expand Down Expand Up @@ -286,6 +360,8 @@ func (v *validator) visit(filepath string, dest string, lineNumbers string) {
v.destFutures[k] = &futureResult{cases: 1, resultFn: func() error { return nil }}
matches := remoteLinkPrefixRe.FindAllStringIndex(dest, 1)
if matches == nil {
v.l.localLinksChecked.Inc()

// Relative or absolute path. Check if exists.
newDest := absLocalLink(v.anchorDir, filepath, dest)

Expand All @@ -295,6 +371,8 @@ func (v *validator) visit(filepath string, dest string, lineNumbers string) {
}
return
}
v.l.remoteLinksChecked.Inc()

validator := v.validateConfig.GetValidatorForURL(dest)
if validator != nil {
matched, err := validator.IsValid(k, v)
Expand Down
Loading