From 9ad63e19e8c19f22d48b97a0fac680a851e41da0 Mon Sep 17 00:00:00 2001 From: Lee ByeongJun Date: Fri, 29 Mar 2024 12:07:54 +0900 Subject: [PATCH] feat(stdlibs): Fuzz for String (#1809) ## Description I have implemented basic Fuzz functions in the testing package and a function to apply fuzzing to string data. To transform string data, I used genetic algorithms. Additionally, I implemented a Uniform distribution to generate random values and then used the central limit theorem to approximate a normal distribution, thereby enabling the creation of random booleans in the random file. In this PR, only the string type is handled. I plan to support other types in the future. --- gnovm/stdlibs/testing/fuzz.gno | 292 ++++++++++++++++++++++++++ gnovm/stdlibs/testing/fuzz_test.gno | 172 +++++++++++++++ gnovm/stdlibs/testing/random.gno | 75 +++++++ gnovm/stdlibs/testing/random_test.gno | 99 +++++++++ 4 files changed, 638 insertions(+) create mode 100644 gnovm/stdlibs/testing/fuzz.gno create mode 100644 gnovm/stdlibs/testing/fuzz_test.gno create mode 100644 gnovm/stdlibs/testing/random.gno create mode 100644 gnovm/stdlibs/testing/random_test.gno diff --git a/gnovm/stdlibs/testing/fuzz.gno b/gnovm/stdlibs/testing/fuzz.gno new file mode 100644 index 00000000000..62a36800b2b --- /dev/null +++ b/gnovm/stdlibs/testing/fuzz.gno @@ -0,0 +1,292 @@ +package testing + +import ( + "math" + "strings" + "time" +) + +type Fuzzer interface { + InsertDeleteMutate(p float64) Fuzzer + Mutate() Fuzzer + String() string +} + +type StringFuzzer struct { + Value string + f *F +} + +func NewStringFuzzer(value string) *StringFuzzer { + return &StringFuzzer{Value: value} +} + +// Mutate changes a StringFuzzer's value by replacing a random character +// with a random ASCII character. +func (sf *StringFuzzer) Mutate() Fuzzer { + runes := []rune(sf.Value) + if len(runes) == 0 { + return sf + } + + index := randRange(0, len(runes)-1) + runes[index] = randomASCIIChar() + + return NewStringFuzzer(string(runes)) +} + +func (sf *StringFuzzer) InsertDeleteMutate(p float64) Fuzzer { + value := InsertDelete(sf.Value, p) + return NewStringFuzzer(value) +} + +func (sf *StringFuzzer) Fuzz() string { + if GenerateRandomBool(0.2) { + return InsertDelete(sf.Value, 0.1) + } + + rs := []rune(sf.Value) + lrs := len(rs) + + if lrs == 0 { + return sf.Value + } + + index := randRange(0, lrs-1) + rs[index] = randomASCIIChar() + + return string(rs) +} + +func (sf *StringFuzzer) String() string { + return sf.Value +} + +func randomASCIIChar() rune { + r := int(randRange(32, 126)) + + return rune(r) +} + +// Individual represents a single individual in the population. +type Individual struct { + Fuzzer Fuzzer + Fitness int +} + +func NewIndividual(fuzzer Fuzzer) *Individual { + return &Individual{Fuzzer: fuzzer} +} + +func (ind *Individual) calculateFitness() { + ind.Fitness = len(ind.Fuzzer.String()) +} + +// Selection selects individuals from the population based on their fitness. +// +// Use roulette wheel selection to select individuals from the population. +// ref: https://en.wikipedia.org/wiki/Fitness_proportionate_selection +func Selection(population []*Individual) []*Individual { + totalFitness := calculateTotalFitness(population) + selected := make([]*Individual, len(population)) + + for i := range selected { + selected[i] = selectIndividual(population, totalFitness) + } + + return selected +} + +func calculateTotalFitness(population []*Individual) int { + totalFitness := 0 + + for _, ind := range population { + totalFitness += ind.Fitness + } + + return totalFitness +} + +func selectIndividual(population []*Individual, totalFitness int) *Individual { + pick := randRange(0, totalFitness-1) + sum := 0 + + for _, ind := range population { + sum += ind.Fitness + if uint64(sum) > uint64(pick) { + return ind + } + } + + return nil +} + +// Crossover takes two parents and creates two children by combining their genetic material. +// +// The pivot point is chosen randomly from the length of the shortest parent. after the pivot point selected, +// the genetic material of the two parents is swapped to create the two children. +func Crossover(parent1, parent2 *Individual) (*Individual, *Individual) { + p1Runes := []rune(parent1.Fuzzer.String()) + p2Runes := []rune(parent2.Fuzzer.String()) + + p1Len := len(p1Runes) + p2Len := len(p2Runes) + + point := 0 + if p1Len >= p2Len { + point = int(randRange(0, p2Len-1)) + } else { + point = int(randRange(0, p1Len-1)) + } + + child1 := append(append([]rune{}, p1Runes[:point]...), p2Runes[point:]...) + child2 := append(append([]rune{}, p2Runes[:point]...), p1Runes[point:]...) + + updatedIdv1 := NewIndividual(NewStringFuzzer(string(child1))) + updatedIdv2 := NewIndividual(NewStringFuzzer(string(child2))) + + return updatedIdv1, updatedIdv2 +} + +func (ind *Individual) Mutate() { + ind.Fuzzer = ind.Fuzzer.Mutate() +} + +// InsertDelete randomly inserts or deletes a character from a string. +func InsertDelete(s string, p float64) string { + rr := []rune(s) + l := len(rr) + + // Insert + if GenerateRandomBool(p) { + pos := randRange(0, l-1) + rr = append(rr, 0) + + copy(rr[pos+1:], rr[pos:]) + + char := randomASCIIChar() + rr[pos] = char + } else { + if l == 0 { + return s + } + + pos := randRange(0, l-1) + rr = append(rr[:pos], rr[pos+1:]...) + } + + return string(rr) +} + +type F struct { + corpus []string + failed bool // Indicates whether the fuzzing has encountered a failure. + msgs []string // Stores log messages for reporting. + iters int // Number of iterations to run the fuzzing process. TODO: CLI flag to set this. +} + +// Runner is a type for the target function to fuzz. +type Runner func(*T, ...interface{}) + +// Fuzz applies the fuzzing process to the target function. +func (f *F) Fuzz(run Runner, iter int) { + f.evolve(iter) + + for _, input := range f.corpus { + args := make([]interface{}, len(f.corpus)) + for i := range args { + args[i] = input + } + + run(nil, args...) + } +} + +// Add adds test values to initialize the corpus. +func (f *F) Add(values ...interface{}) []Fuzzer { + fuzzers := make([]Fuzzer, len(values)) + + for i, v := range values { + str, ok := v.(string) + if !ok { + continue + } + f.corpus = append(f.corpus, str) + fuzzers[i] = &StringFuzzer{Value: str} + } + + return fuzzers +} + +func (f *F) evolve(generations int) { + population := make([]*Individual, len(f.corpus)) + for i, c := range f.corpus { + population[i] = &Individual{Fuzzer: &StringFuzzer{Value: c, f: f}} + } + + for _, ind := range population { + ind.calculateFitness() + } + + for gen := 0; gen < generations; gen++ { + population = Selection(population) + newPopulation := make([]*Individual, 0, len(population)) + + for i := 0; i < len(population); i += 2 { + if i+1 < len(population) { + child1, child2 := Crossover(population[i], population[i+1]) + newPopulation = append(newPopulation, child1, child2) + continue + } + + newPopulation = append(newPopulation, population[i]) + } + + var ( + bestFitness int + bestIndividual string + ) + + for _, ind := range newPopulation { + if GenerateRandomBool(0.2) { + ind.Mutate() + } + + if GenerateRandomBool(0.1) { + ind.Fuzzer = ind.Fuzzer.InsertDeleteMutate(0.3) + } + + ind.calculateFitness() + + if ind.Fitness > bestFitness { + bestFitness = ind.Fitness + bestIndividual = ind.Fuzzer.String() + } + } + + population = newPopulation + } + + f.corpus = make([]string, len(population)) + for i, ind := range population { + f.corpus[i] = ind.Fuzzer.String() + } +} + +// Fail marks the function as having failed bur continue execution. +func (f *F) Fail() { + f.failed = true +} + +// Fatal is equivalent to Log followed by FailNow. +// It logs the message and marks the fuzzing as failed. +func (f *F) Fatal(args ...interface{}) { + var sb strings.Builder + + for _, arg := range args { + sb.WriteString(arg.(string)) + } + + f.msgs = append(f.msgs, sb.String()) + f.Fail() +} diff --git a/gnovm/stdlibs/testing/fuzz_test.gno b/gnovm/stdlibs/testing/fuzz_test.gno new file mode 100644 index 00000000000..454c1cccd6e --- /dev/null +++ b/gnovm/stdlibs/testing/fuzz_test.gno @@ -0,0 +1,172 @@ +package testing + +import ( + "encoding/binary" + "strings" + "time" +) + +func TestMutate(t *T) { + originalValue := "Hello" + fuzzer := StringFuzzer{Value: originalValue} + + newFuzzer := fuzzer.Mutate().(*StringFuzzer) + + if newFuzzer.Value == originalValue { + t.Errorf("Mutate did not change the string: got %v, want different from %v", newFuzzer.Value, originalValue) + } + + if len(newFuzzer.Value) != len(originalValue) { + t.Errorf("Mutated string has different length: got %s (len=%v), want %s (len=%v)", newFuzzer.Value, len(newFuzzer.Value), originalValue, len(originalValue)) + } +} + +func TestSelection(t *T) { + tests := []struct { + name string + population []*Individual + }{ + { + name: "Empty population", + population: []*Individual{}, + }, + { + name: "Uniform fitness", + population: []*Individual{ + {Fitness: 10}, + {Fitness: 10}, + {Fitness: 10}, + }, + }, + { + name: "Different fitness", + population: []*Individual{ + {Fitness: 5}, + {Fitness: 15}, + {Fitness: 10}, + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *T) { + selected := Selection(tc.population) + if len(selected) != len(tc.population) { + t.Errorf("Expected selected length to be %d, got %d", len(tc.population), len(selected)) + } + }) + } +} + +func TestCrossover(t *T) { + parent1 := NewIndividual(&StringFuzzer{Value: "foobar"}) + parent2 := NewIndividual(&StringFuzzer{Value: "bazbiz"}) + + var child1, child2 *Individual + for i := 0; i < 100; i++ { + child1, child2 = Crossover(parent1, parent2) + } + + if child1.Fuzzer.String() == "foobar" || child2.Fuzzer.String() == "bazbiz" { + t.Errorf("Crossover did not modify children correctly, got %s and %s", child1.Fuzzer.String(), child2.Fuzzer.String()) + } +} + +func Test_StringManipulation(t *T) { + f := &F{ + corpus: []string{"hello", "world", "foo", "bar"}, + } + + f.evolve(30) + + if len(f.corpus) != 4 { + t.Fatalf("corpus length is %d, want 4", len(f.corpus)) + } + + for i, c := range f.corpus { + if c == "" { + t.Fatalf("corpus[%d] is empty", i) + } + + if len(c) < 3 { + t.Fatalf("corpus[%d] is too short: %s", i, c) + } + + if f.corpus[0] == "hello" { + t.Fatalf("corpus[0] is still the same: %s", f.corpus[0]) + } + + if f.corpus[1] == "world" { + t.Fatalf("corpus[1] is still the same: %s", f.corpus[1]) + } + + if f.corpus[2] == "foo" { + t.Fatalf("corpus[2] is still the same: %s", f.corpus[2]) + } + + if f.corpus[3] == "bar" { + t.Fatalf("corpus[3] is still the same: %s", f.corpus[3]) + } + + } +} + +func TestFuzz(t *T) { + f := F{} + f.Add("hello", "world", "foo") + f.Fuzz(func(t *T, inputs ...interface{}) { + for _, input := range inputs { + strInput, ok := input.(string) + if !ok { + t.Errorf("Type mismatch, expected a string but got %T", input) + continue + } + + words := strings.Fields(strInput) + if len(words) == 0 { + t.Errorf("Expected non-empty input") + } + } + }, 15) + + if len(f.corpus) == 0 { + t.Fatalf("Fuzzing corpus is empty after testing") + } + + if len(f.corpus) > 3 { + t.Fatalf("Fuzzing corpus has more than 3 elements: %v", f.corpus) + } + + for _, c := range f.corpus { + if c == "hello" || c == "world" || c == "foo" { + t.Fatalf("Fuzzing corpus still contains the original elements: %v", f.corpus) + } + } +} + +func TestF_Fail(t *T) { + f := F{} + f.Fail() + + if !f.failed { + t.Errorf("Fail did not set the failed flag.") + } +} + +func TestF_Fatal(t *T) { + f := F{} + testMessage := "test failure message" + f.Fatal(testMessage) + + if !f.failed { + t.Errorf("Fatal did not set the failed flag.") + } + + if len(f.msgs) != 1 { + t.Fatalf("Fatal did not set the message correctly: got %v, want %v", f.msgs, testMessage) + } + + if !strings.Contains(f.msgs[0], testMessage) { + t.Errorf("Fatal did not set the message correctly: got %v, want %v", f.msgs[0], testMessage) + } +} diff --git a/gnovm/stdlibs/testing/random.gno b/gnovm/stdlibs/testing/random.gno new file mode 100644 index 00000000000..b7ee6ca93ef --- /dev/null +++ b/gnovm/stdlibs/testing/random.gno @@ -0,0 +1,75 @@ +package testing + +import ( + "math" + "time" +) + +// Internal state for the random number generator. +var x uint64 = 42 + +// UniformRand generates a uniformly distributed random number. +// It uses the linear congrential generator method to produce the random number. +// and the result is in the range from 0 to m-1. here, m is 32768. +// To produce random number in [0, m-1], repeat this method as many times as needed. +// [1] https://en.wikipedia.org/wiki/Linear_congruential_generator +func UniformRand() uint64 { + var a uint64 = 950213 + var c uint64 = 12345 + var m uint64 = 32768 + x = x*a + c + return (x >> 16) % m +} + +// _srand function sets the seed for the random number generator. +// This function provides an initial starting point for the sequence of random numbers. +func _srand(seed int64) { + x = uint64(seed) +} + +// nrand function generates a number approximating a normal distribution[1]. +// It uses the Central Limit Theorem[2] by summing multiple uniformly distributed random numbers +// to approximate a normal distribution. +// +// y = Sum(k=1, K) (x_k - K/2) / sqrt(K/12) +// +// Here, K is some integer ans x_k are uniformly distributed numbers, +// even for K as small as 10, the approximation is quite good. +// [1] https://en.wikipedia.org/wiki/Normal_distribution +// [2] https://en.wikipedia.org/wiki/Central_limit_theorem +func nrand() float64 { + var i, K uint64 = 0, 10 + var m uint64 = 32768 + var y float64 = 0 + + for i = 0; i < K; i++ { + y += float64(UniformRand()) / float64(m) + } + y = (y - float64(K)/2) / math.Sqrt(float64(K)/12) + return y +} + +// randRange generates a random integer between min and max (inclusive). +// This function leverages the UniformRand function to generate a random number in a specified range. +// Note: max should be greater than min. +func randRange(min, max int) uint64 { + _min := uint64(min) + _max := uint64(max) + if _min >= _max { + return _min + } + + rangeSize := _max - _min + 1 + // adjust UniformRand to fit into our range. + return _min + (UniformRand() % rangeSize) +} + +func GenerateRandomBool(bias float64) bool { + // Modify to use fuzz's random function for generating boolean with bias + if bias < 0 || bias > 1 { + panic("bias should be in the range [0, 1]") + } + // Convert fuzz's normalized range random float [-1, 1] to [0, 1] + res := (nrand() + 1) / 2 + return res > bias +} diff --git a/gnovm/stdlibs/testing/random_test.gno b/gnovm/stdlibs/testing/random_test.gno new file mode 100644 index 00000000000..8c1c741b2b8 --- /dev/null +++ b/gnovm/stdlibs/testing/random_test.gno @@ -0,0 +1,99 @@ +package testing + +import ( + "math" + "strconv" + "time" +) + +func updateSeed() { + seed := time.Now().UnixNano() + _srand(seed) +} + +func Test_UniformRand(t *T) { + valueMap := make(map[uint64]int) + maxIter := 1000 + + for i := 0; i < maxIter; i++ { + result := UniformRand() + + if result < 0 || result >= 32768 { + t.Errorf("rand() = %v, want in range [0, 32767]", result) + } + + if _, ok := valueMap[result]; ok { + valueMap[result]++ + } else { + valueMap[result] = 1 + } + } + + lvm := len(valueMap) + if lvm > maxIter || lvm == 0 { + t.Errorf("len(valueMap) = %v, want <= %v", lvm, maxIter) + } +} + +func Test_nrand(t *T) { + sum := 0.0 + for i := 0; i < 1000; i++ { + result := nrand() + sum += result + } + avg := sum / float64(1000) + + // The value returned by nrand() should be close to 0 + // on average for large sample sizes. + // The expectation of the standard deviation should be + // close to 1 for large sample sizes. + if math.Abs(avg) > 0.1 { // can be adjusted based on sample size + t.Errorf("nrand() = %v, want in range [-0.1, 0.1]", avg) + } +} + +func Test_GenerateRandomBool(t *T) { + updateSeed() + + for _, bias := range []float64{0, 0.5, 1} { + trueCount, falseCount := 0, 0 + sampleSize := 1000 + + for i := 0; i < sampleSize; i++ { + result := GenerateRandomBool(bias) + if result { + trueCount++ + } else { + falseCount++ + } + } + + if trueCount == 0 || falseCount == 0 { + t.Errorf("Bias = %v, trueCount = %v, falseCount = %v, want both > 0", bias, trueCount, falseCount) + } + + if bias < 0 || bias > 1 { + t.Errorf("Bias = %v, want in range [0, 1]", bias) + } + } +} + +func TestRandRange(t *T) { + nums := make(map[uint64]int) + for i := 0; i < 1000; i++ { + res := randRange(0, 10) + if res < 0 || res > 10 { + t.Errorf("gerandRangenerateRange() = %v, want in range [0, 9]", res) + } + + if _, ok := nums[res]; ok { + nums[res]++ + } else { + nums[res] = 1 + } + } + + if len(nums) != 11 { + t.Errorf("len(nums) = %v, want in range [0, 10]", len(nums)) + } +}