Skip to content

Commit

Permalink
Add/fix provenance collectors and workloads (#102)
Browse files Browse the repository at this point in the history
  • Loading branch information
charmoniumQ committed Feb 18, 2025
1 parent 1cf22ff commit b3402e3
Show file tree
Hide file tree
Showing 50 changed files with 2,350 additions and 3,712 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ jobs:

- run: nix flake check --all-systems --print-build-logs

- run: nix flake check ./benchmark --all-systems --print-build-logs

- run: nix build --print-build-logs .#probe-bundled

# The devshell uses slightly different build process than the Nix pkg
Expand Down
8 changes: 8 additions & 0 deletions benchmark/REPRODUCING.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,11 @@ For new provenance collectors:

Note that the attribute `nix_packages`, in both cases, contains a list of strings that reference packages defined as in package outputs for the current architecture the `flake.nix`.
Using Nix to build our software environment ensures that all architectures and POSIX platforms can reproducibly build the relevant software environments.

## Command used

```
./runner.py --workloads small-calib --collectors fast --iterations 1 --warmups 30
./runner.py --workloads big-calib --collectors noprov --iterations 1 --warmups 5 --append
./runner.py --workloads fast --collectors fast --iterations 3 --warmups 3 --append
```
39 changes: 0 additions & 39 deletions benchmark/bpftrace_preexec.py

This file was deleted.

30 changes: 0 additions & 30 deletions benchmark/bpftrace_workload_preexec.py

This file was deleted.

101 changes: 52 additions & 49 deletions benchmark/command.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,12 @@
import shlex
import datetime
import measure_resources
import hashlib
import pathlib
import typing
import dataclasses
import subprocess
import util
import json
from mandala.imports import op
import mandala.model


@dataclasses.dataclass(frozen=True)
class Placeholder:
value: str
postfix: str = ""
prefix: str = ""

def expand(self, context: typing.Mapping[str, str]) -> str:
return self.prefix + context[self.value] + self.postfix
from mandala.imports import op # type: ignore
import mandala.model # type: ignore


def nix_build(attr: str) -> str:
Expand All @@ -28,49 +16,48 @@ def nix_build(attr: str) -> str:
# If the flake is changed, the mandala cache is invalid
# Therefore, make the Nix flake and lock an argument tracked by Mandala.
path = pathlib.Path(attr.partition("#")[0])
ret = _nix_build(
ret = _cached_nix_build(
attr,
(path / "flake.nix").read_text(),
json.loads((path / "flake.lock").read_text()),
)
else:
ret = _nix_build(attr, "", None)
return mandala.model.Context.current_context.storage.unwrap(ret)
ret = _cached_nix_build(attr, "", None)
ctx = mandala.model.Context.current_context
if ctx is None:
return ret
else:
return ctx.storage.unwrap(ret)


@op
def _nix_build(attr: str, flake_src: str, flake_lock: typing.Any) -> str:
def _cached_nix_build(attr: str, flake_src: str, flake_lock: typing.Any) -> str:
print(f"Nix building {attr}")
start = datetime.datetime.now()
cmd = ["nix", "build", attr, "--print-out-paths", "--no-link"]
proc = subprocess.run(
cmd,
capture_output=True,
text=True,
)
if proc.returncode != 0:
print(shlex.join(cmd))
print(proc.stdout)
print(proc.stderr)
raise RuntimeError("Nix build failed")
ret = proc.stdout.strip()
print(f"Nix built {attr} in {(datetime.datetime.now() - start).total_seconds():.1f}")
proc = measure_resources.measure_resources(cmd)
print(f"Done in {proc.walltime.total_seconds():.1f}sec")
proc.raise_for_error()
ret = proc.stdout.decode().strip()
return ret


@dataclasses.dataclass(frozen=True)
class NixPath:
package: str
postfix: str = ""
prefix: str = ""
class Variable:
name: str


@dataclasses.dataclass(frozen=True)
class NixAttr:
attr: str

def expand(self) -> str:
return self.prefix + nix_build(self.package) + self.postfix
return nix_build(self.attr)

def __hash__(self) -> int:
return int.from_bytes(hashlib.md5(self.expand().encode()).digest())

def __eq__(self, other: typing.Any) -> bool:
if isinstance(other, NixPath):
if isinstance(other, NixAttr):
return self.expand() == other.expand()
else:
return False
Expand All @@ -82,18 +69,34 @@ def __getstate__(self) -> typing.Mapping[str, str]:


@dataclasses.dataclass(frozen=True)
class Command:
args: tuple[str | NixPath | Placeholder | pathlib.Path, ...]

def expand(self, context: typing.Mapping[str, str]) -> tuple[str, ...]:
return tuple(
arg if isinstance(arg, str) else
arg.expand() if isinstance(arg, NixPath) else
str(arg) if isinstance(arg, pathlib.Path) else
arg.expand(context) if isinstance(arg, Placeholder) else
util.raise_(TypeError(f"{type(arg)!s}: {arg!r}"))
for arg in self.args
class Combo:
parts: typing.Sequence[str | Variable | NixAttr | pathlib.Path]

def expand(self, context: typing.Mapping[str, str]) -> str:
return "".join(
part if isinstance(part, str) else
context[part.name] if isinstance(part, Variable) else
part.expand() if isinstance(part, NixAttr) else
str(part) if isinstance(part, pathlib.Path) else
util.raise_(TypeError(f"{type(part)!s}: {part!r}"))
for part in self.parts
)


@dataclasses.dataclass(frozen=True)
class Command:
args: typing.Sequence[str | NixAttr | Variable | Combo]

def expand(self, context: typing.Mapping[str, str]) -> list[str]:
return [
part if isinstance(part, str) else
context[part.name] if isinstance(part, Variable) else
part.expand() if isinstance(part, NixAttr) else
str(part) if isinstance(part, pathlib.Path) else
part.expand(context) if isinstance(part, Combo) else
util.raise_(TypeError(f"{type(part)!s}: {part!r}"))
for part in self.args
]

def __bool__(self) -> bool:
return bool(self.args)
57 changes: 0 additions & 57 deletions benchmark/compare.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -133,4 +133,4 @@ def make_var_density_blobs(n_samples=750, centers=[[0,0]], cluster_std=[0.5], ra
horizontalalignment='right')
plot_num += 1

plt.show()
plt.savefig("test.png")
13 changes: 13 additions & 0 deletions benchmark/data-science/plot_simple.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import seaborn as sns
sns.set_theme(style="ticks")

# Load the example dataset for Anscombe's quartet
df = sns.load_dataset("anscombe")

# Show the results of a linear regression within each dataset
plot = sns.lmplot(
data=df, x="x", y="y", col="dataset", hue="dataset",
col_wrap=2, palette="muted", ci=None,
height=4, scatter_kws={"s": 50, "alpha": 1}
)
plot.fig.savefig("out.png")
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@
iris = datasets.load_iris()
digits = datasets.load_digits(n_class=10)
wine = datasets.load_wine()
# swissroll, swissroll_labels = datasets.make_swiss_roll(
# n_samples=1000, noise=0.1, random_state=42
# )
swissroll, swissroll_labels = datasets.make_swiss_roll(
n_samples=1000, noise=0.1, random_state=42
)
# sphere = np.random.normal(size=(600, 3))
# sphere = preprocessing.normalize(sphere)
# sphere_hsv = np.array(
Expand All @@ -81,7 +81,9 @@

reducers = [
(manifold.TSNE, {"perplexity": 50}),
# Original authors commented out this line:
# (manifold.LocallyLinearEmbedding, {'n_neighbors':10, 'method':'hessian'}),
# It seems to cause a crash due to singular input matrix
(manifold.Isomap, {"n_neighbors": 30}),
(manifold.MDS, {}),
(decomposition.PCA, {}),
Expand All @@ -93,12 +95,12 @@
(iris.data, iris.target),
(digits.data, digits.target),
(wine.data, wine.target),
# (swissroll, swissroll_labels),
(swissroll, swissroll_labels),
# (sphere, sphere_colors),
]
dataset_names = [
"Blobs", "Iris", "Digits", "Wine",
# "Swiss Roll",
"Swiss Roll",
# "Sphere"
]

Expand Down Expand Up @@ -142,3 +144,4 @@

plt.tight_layout()
# plt.show()
plt.savefig("test.png")
Loading

0 comments on commit b3402e3

Please sign in to comment.