diff --git a/README.md b/README.md index 88eb8e1..058739f 100644 --- a/README.md +++ b/README.md @@ -326,6 +326,6 @@ The targeted terraform folder is provided as the only argument. By default, it u | `unit.carbon` | | `g` | Carbon emission in `g` (gram) or `kg` | `out.format` | `-f ` `--format=` | `text` | `text` or `json` | `out.file` | `-o ` `--output=`| | file to write report to. Default is standard output. -| `data.path` | `` | | path of terraform files to analyse +| `data.path` | `` | | path of carbonifer data files (coefficents...). Default uses embedded files | `avg_cpu_use` | | `0.5` | planned [average percentage of CPU used](doc/methodology.md#cpu) | `log` | | `warn` | level of logs `info`, `debug`, `warn`, `error` diff --git a/doc/methodology.md b/doc/methodology.md index 48b0fcc..9d7a719 100644 --- a/doc/methodology.md +++ b/doc/methodology.md @@ -23,13 +23,13 @@ Average Watts = Number of vCPU * (Min Watts + Avg vCPU Utilization * (Max Watts - `Average Watts` result in Watt Hour - `Number of vCPU` : depends on the machine type chosen - - [GCP machine types](../data/gcp_instances.json) + - [GCP machine types](../internal/data/data/gcp_instances.json) - AWS - Azure - `Min Watt` and `Max Watts` depend on CPU architecture - - If processor architecture is unknown, we use averages computed by [Carbon Footprint Calculator](https://www.cloudcarbonfootprint.org/docs/methodology/#appendix-i-energy-coefficients): [energy coefficients](../data/energy_coefficients.json) + - If processor architecture is unknown, we use averages computed by [Carbon Footprint Calculator](https://www.cloudcarbonfootprint.org/docs/methodology/#appendix-i-energy-coefficients): [energy coefficients](../internal/data/data/energy_coefficients.json) - If we do know them, we use a more detailed list: - - [GCP Watt per CPU type](../data/gcp_watt_cpu.csv) + - [GCP Watt per CPU type](../internal/data/data/gcp_watt_cpu.csv) - `Avg vCPU Utilization` because we do this estimation at "plan" time, there is no way to pick a relevant value. However, to be able to plan and compare different CPUs or regions we need to set this constant. This is read from (by descending priority order) - user's config file in `$HOME/.carbonifer/config.yml`), variable `avg_cpu_use` - targeted folder config file in `$TERRAFORM_PROJECT/.carbonifer/config.yml`), variable `avg_cpu_use` @@ -45,7 +45,7 @@ Watt hours = Memory usage (GB) x Memory Energy Coefficient ### Disk Storage -We are using the same `Storage Energy Coefficient` as [Carbon Footprint Calculator](https://www.cloudcarbonfootprint.org/docs/methodology/#storage) in [energy coefficients file](../data/energy_coefficients.json). This coefficient is different for SSD and HDD, so disk type is important. +We are using the same `Storage Energy Coefficient` as [Carbon Footprint Calculator](https://www.cloudcarbonfootprint.org/docs/methodology/#storage) in [energy coefficients file](../internal/data/data/energy_coefficients.json). This coefficient is different for SSD and HDD, so disk type is important. ```text Watt hours = Disk Size (TB) x Storage Energy Coefficient x Replication Factor @@ -68,7 +68,7 @@ Unless set by the user in terraform file, the default size can be hard to find: ### GPU -Similarily to [CPU](#cpu), GPU energy consumption is calculated from the GPU type from min/max Watt described in [Carbon Footprint Calculator](https://www.cloudcarbonfootprint.org/docs/methodology/#graphic-processing-units-gpus), we use min/max watt from constant file [GPU Watt per GPU Type](../data/gpu_watt.csv) and apply same formula as [CPU](#cpu). +Similarily to [CPU](#cpu), GPU energy consumption is calculated from the GPU type from min/max Watt described in [Carbon Footprint Calculator](https://www.cloudcarbonfootprint.org/docs/methodology/#graphic-processing-units-gpus), we use min/max watt from constant file [GPU Watt per GPU Type](../internal/data/data/gpu_watt.csv) and apply same formula as [CPU](#cpu). Average GPU Utilization is also read from: diff --git a/internal/data/data.go b/internal/data/data.go new file mode 100644 index 0000000..3de11c4 --- /dev/null +++ b/internal/data/data.go @@ -0,0 +1,46 @@ +package data + +import ( + "embed" + "io/fs" + "io/ioutil" + "os" + "path/filepath" + + log "github.com/sirupsen/logrus" + + "github.com/spf13/viper" +) + +//go:embed data/* +var data embed.FS + +func ReadDataFile(filename string) []byte { + dataPath := viper.GetString("data.path") + if dataPath != "" { + // If the environment variable is set, read from the specified file + filePath := filepath.Join(dataPath, filename) + if _, err := os.Stat(filePath); !os.IsNotExist(err) { + log.Debugf(" reading datafile '%v' from: %v", filename, filePath) + data, err := ioutil.ReadFile(filePath) + if err != nil { + log.Fatal(err) + } + return data + } else { + return readEmbeddedFile(filename) + } + } else { + // Otherwise, read from the embedded file + return readEmbeddedFile(filename) + } +} + +func readEmbeddedFile(filename string) []byte { + log.Debugf(" reading datafile '%v' embedded", filename) + data, err := fs.ReadFile(data, "data/"+filename) + if err != nil { + log.Fatal(err) + } + return data +} diff --git a/data/aws_co2_region.csv b/internal/data/data/aws_co2_region.csv similarity index 100% rename from data/aws_co2_region.csv rename to internal/data/data/aws_co2_region.csv diff --git a/data/aws_instances.json b/internal/data/data/aws_instances.json similarity index 100% rename from data/aws_instances.json rename to internal/data/data/aws_instances.json diff --git a/data/energy_coefficients.json b/internal/data/data/energy_coefficients.json similarity index 100% rename from data/energy_coefficients.json rename to internal/data/data/energy_coefficients.json diff --git a/data/gcp_co2_region.csv b/internal/data/data/gcp_co2_region.csv similarity index 100% rename from data/gcp_co2_region.csv rename to internal/data/data/gcp_co2_region.csv diff --git a/data/gcp_instances.json b/internal/data/data/gcp_instances.json similarity index 100% rename from data/gcp_instances.json rename to internal/data/data/gcp_instances.json diff --git a/data/gcp_sql_tiers.json b/internal/data/data/gcp_sql_tiers.json similarity index 100% rename from data/gcp_sql_tiers.json rename to internal/data/data/gcp_sql_tiers.json diff --git a/data/gcp_watt_cpu.csv b/internal/data/data/gcp_watt_cpu.csv similarity index 100% rename from data/gcp_watt_cpu.csv rename to internal/data/data/gcp_watt_cpu.csv diff --git a/data/gpu_watt.csv b/internal/data/data/gpu_watt.csv similarity index 100% rename from data/gpu_watt.csv rename to internal/data/data/gpu_watt.csv diff --git a/internal/estimate/coefficients/EmissionsPerRegion.go b/internal/estimate/coefficients/EmissionsPerRegion.go index 1949f11..5960a0a 100644 --- a/internal/estimate/coefficients/EmissionsPerRegion.go +++ b/internal/estimate/coefficients/EmissionsPerRegion.go @@ -3,13 +3,13 @@ package coefficients import ( "errors" "fmt" - "path/filepath" + "strings" + "github.com/carboniferio/carbonifer/internal/data" "github.com/carboniferio/carbonifer/internal/providers" "github.com/shopspring/decimal" log "github.com/sirupsen/logrus" - "github.com/spf13/viper" "github.com/yunabe/easycsv" ) @@ -54,9 +54,9 @@ type Emissions struct { func loadEmissionsPerRegion(dataFile string) map[string]Emissions { // Read the CSV records var records []EmissionsCSV - regionEmissionFile := filepath.Join(viper.GetString("data.path"), dataFile) + regionEmissionFile := data.ReadDataFile(dataFile) log.Debugf("reading GCP region/grid emissions from: %v", regionEmissionFile) - if err := easycsv.NewReaderFile(regionEmissionFile).ReadAll(&records); err != nil { + if err := easycsv.NewReader(strings.NewReader(string(regionEmissionFile))).ReadAll(&records); err != nil { log.Fatal(err) } diff --git a/internal/estimate/coefficients/coefficients.go b/internal/estimate/coefficients/coefficients.go index a045421..780ff60 100644 --- a/internal/estimate/coefficients/coefficients.go +++ b/internal/estimate/coefficients/coefficients.go @@ -2,15 +2,12 @@ package coefficients import ( "encoding/json" - "io" - "os" - "path/filepath" "reflect" + "github.com/carboniferio/carbonifer/internal/data" "github.com/carboniferio/carbonifer/internal/providers" "github.com/shopspring/decimal" log "github.com/sirupsen/logrus" - "github.com/spf13/viper" ) type Coefficients struct { @@ -33,16 +30,8 @@ var coefficientsPerProviders *CoefficientsProviders func GetEnergyCoefficients() *CoefficientsProviders { if coefficientsPerProviders == nil { - energyCoefFile := filepath.Join(viper.GetString("data.path"), "energy_coefficients.json") - log.Debugf("reading Energy Coefficient Data file from: %v", energyCoefFile) - jsonFile, err := os.Open(energyCoefFile) - if err != nil { - log.Fatal(err) - } - defer jsonFile.Close() - - byteValue, _ := io.ReadAll(jsonFile) - err = json.Unmarshal([]byte(byteValue), &coefficientsPerProviders) + energyCoefFile := data.ReadDataFile("energy_coefficients.json") + err := json.Unmarshal(energyCoefFile, &coefficientsPerProviders) if err != nil { log.Fatal(err) } diff --git a/internal/providers/GPUWatt.go b/internal/providers/GPUWatt.go index 4449e62..dd9d0b7 100644 --- a/internal/providers/GPUWatt.go +++ b/internal/providers/GPUWatt.go @@ -1,12 +1,11 @@ package providers import ( - "path/filepath" "strings" + "github.com/carboniferio/carbonifer/internal/data" "github.com/shopspring/decimal" log "github.com/sirupsen/logrus" - "github.com/spf13/viper" "github.com/yunabe/easycsv" ) @@ -30,9 +29,9 @@ func GetGPUWatt(gpuName string) GPUWatt { if wattPerGPU == nil { // Read the CSV records var records []gpuWattCSV - gpuPowerDataFile := filepath.Join(viper.GetString("data.path"), "gpu_watt.csv") + gpuPowerDataFile := data.ReadDataFile("gpu_watt.csv") log.Debugf(" reading gpu power data from: %v", gpuPowerDataFile) - if err := easycsv.NewReaderFile(gpuPowerDataFile).ReadAll(&records); err != nil { + if err := easycsv.NewReader(strings.NewReader(string(gpuPowerDataFile))).ReadAll(&records); err != nil { log.Fatal(err) } diff --git a/internal/providers/aws/AWS.go b/internal/providers/aws/AWS.go index acec94a..3ef40c9 100644 --- a/internal/providers/aws/AWS.go +++ b/internal/providers/aws/AWS.go @@ -2,12 +2,9 @@ package aws import ( "encoding/json" - "io" - "os" - "path/filepath" + "github.com/carboniferio/carbonifer/internal/data" log "github.com/sirupsen/logrus" - "github.com/spf13/viper" ) type MachineType struct { @@ -21,16 +18,8 @@ var awsInstanceTypes map[string]MachineType func GetAWSInstanceType(instanceTypeStr string) MachineType { log.Debugf(" Getting info for AWS machine type: %v", instanceTypeStr) if awsInstanceTypes == nil { - instancesDataFile := filepath.Join(viper.GetString("data.path"), "aws_instances.json") - log.Debugf(" reading aws instances data from: %v", instancesDataFile) - jsonFile, err := os.Open(instancesDataFile) - if err != nil { - log.Fatal(err) - } - defer jsonFile.Close() - - byteValue, _ := io.ReadAll(jsonFile) - err = json.Unmarshal([]byte(byteValue), &awsInstanceTypes) + byteValue := data.ReadDataFile("aws_instances.json") + err := json.Unmarshal([]byte(byteValue), &awsInstanceTypes) if err != nil { log.Fatal(err) } diff --git a/internal/providers/gcp/GCP.go b/internal/providers/gcp/GCP.go index 7f467f8..27f35f1 100644 --- a/internal/providers/gcp/GCP.go +++ b/internal/providers/gcp/GCP.go @@ -2,16 +2,13 @@ package gcp import ( "encoding/json" - "io" - "os" - "path/filepath" "regexp" "strconv" "strings" + "github.com/carboniferio/carbonifer/internal/data" "github.com/shopspring/decimal" log "github.com/sirupsen/logrus" - "github.com/spf13/viper" "github.com/yunabe/easycsv" ) @@ -67,16 +64,8 @@ func GetGCPMachineType(machineTypeStr string, zone string) MachineType { } } if gcpInstanceTypes == nil { - gcpInstancesDataFile := filepath.Join(viper.GetString("data.path"), "gcp_instances.json") - log.Debugf(" reading gcp instances data from: %v", gcpInstancesDataFile) - jsonFile, err := os.Open(gcpInstancesDataFile) - if err != nil { - log.Fatal(err) - } - defer jsonFile.Close() - - byteValue, _ := io.ReadAll(jsonFile) - err = json.Unmarshal([]byte(byteValue), &gcpInstanceTypes) + byteValue := data.ReadDataFile("gcp_instances.json") + err := json.Unmarshal([]byte(byteValue), &gcpInstanceTypes) if err != nil { log.Fatal(err) } @@ -99,9 +88,8 @@ func GetCPUWatt(cpu string) CPUWatt { if gcpWattPerCPU == nil { // Read the CSV records var records []cpuWattCSV - gcpPowerDataFile := filepath.Join(viper.GetString("data.path"), "gcp_watt_cpu.csv") - log.Debugf(" reading GCP cpu power data from: %v", gcpPowerDataFile) - if err := easycsv.NewReaderFile(gcpPowerDataFile).ReadAll(&records); err != nil { + fileContents := data.ReadDataFile("gcp_watt_cpu.csv") + if err := easycsv.NewReader(strings.NewReader(string(fileContents))).ReadAll(&records); err != nil { log.Fatal(err) } @@ -146,16 +134,8 @@ func GetGCPSQLTier(tierName string) SqlTier { } } if gcpSQLTiers == nil { - gcpSQLTierDataFile := filepath.Join(viper.GetString("data.path"), "gcp_sql_tiers.json") - log.Debugf(" reading gcp sql tier data from: %v", gcpSQLTierDataFile) - jsonFile, err := os.Open(gcpSQLTierDataFile) - if err != nil { - log.Fatal(err) - } - defer jsonFile.Close() - - byteValue, _ := io.ReadAll(jsonFile) - err = json.Unmarshal([]byte(byteValue), &gcpSQLTiers) + byteValue := data.ReadDataFile("gcp_sql_tiers.json") + err := json.Unmarshal([]byte(byteValue), &gcpSQLTiers) if err != nil { log.Fatal(err) } diff --git a/internal/terraform/gcp/resources_test.go b/internal/terraform/gcp/resources_test.go index 2ff2a4c..c7ff992 100644 --- a/internal/terraform/gcp/resources_test.go +++ b/internal/terraform/gcp/resources_test.go @@ -184,7 +184,7 @@ func TestGetResource(t *testing.T) { }, Specs: &resources.ComputeResourceSpecs{ GpuTypes: []string{ - "nvidia-tesla-a100", + "testing-custom-data-file", }, VCPUs: int32(12), MemoryMb: int32(87040), diff --git a/internal/terraform/terraform_test.go b/internal/terraform/terraform_test.go index e9ff069..9442287 100644 --- a/internal/terraform/terraform_test.go +++ b/internal/terraform/terraform_test.go @@ -139,9 +139,9 @@ func TestGetResources(t *testing.T) { MemoryMb: 87040, VCPUs: 12, GpuTypes: []string{ - "nvidia-tesla-a100", // Default of a2-highgpu-1g" - "nvidia-tesla-k80", // Added by user in main.tf - "nvidia-tesla-k80", // Added by user in main.tf + "testing-custom-data-file", // Default of a2-highgpu-1g" + "nvidia-tesla-k80", // Added by user in main.tf + "nvidia-tesla-k80", // Added by user in main.tf }, ReplicationFactor: 1, }, diff --git a/internal/testutils/testutils.go b/internal/testutils/testutils.go index 38ba3b4..b735350 100644 --- a/internal/testutils/testutils.go +++ b/internal/testutils/testutils.go @@ -12,6 +12,7 @@ import ( var RootDir string func init() { + _, filename, _, _ := runtime.Caller(0) RootDir = path.Join(path.Dir(filename), "../..") err := os.Chdir(RootDir) diff --git a/internal/tools/aws/instances/README.md b/internal/tools/aws/instances/README.md new file mode 100644 index 0000000..eb71a5c --- /dev/null +++ b/internal/tools/aws/instances/README.md @@ -0,0 +1,11 @@ +# Generate GCP Instances + +Tool to generate data/aws_instances.json + +Requirement: + +- go installed (1.17) + +```bash +go run internal/tools/gcp/instances/generate.go > data/gcp_instances.json +``` diff --git a/internal/utils/config.go b/internal/utils/config.go index f553819..544fcfa 100644 --- a/internal/utils/config.go +++ b/internal/utils/config.go @@ -111,22 +111,21 @@ func initLogger() { func checkDataConfig() { dataPath := viper.GetString("data.path") - if dataPath == "" { - log.Fatalf("Data directory is not set (\"data.path\")") - } - path, err := filepath.Abs(dataPath) - if err != nil { - log.Fatal(err) - } - f, err := os.Open(dataPath) - if err != nil { - log.Fatalf("Cannot read data directory \"%v\": %v", path, err) - } - defer f.Close() + if dataPath != "" { + path, err := filepath.Abs(dataPath) + if err != nil { + log.Fatal(err) + } + f, err := os.Open(dataPath) + if err != nil { + log.Fatalf("Cannot read data directory \"%v\": %v", path, err) + } + defer f.Close() - _, err = f.Readdirnames(1) - if err == io.EOF { - log.Fatalf("Empty data directory \"%v\": %v", path, err) + _, err = f.Readdirnames(1) + if err == io.EOF { + log.Fatalf("Empty data directory \"%v\": %v", path, err) + } } } diff --git a/internal/utils/defaults.yaml b/internal/utils/defaults.yaml index 0066a9e..89225be 100644 --- a/internal/utils/defaults.yaml +++ b/internal/utils/defaults.yaml @@ -3,8 +3,6 @@ out: format: text file: -data: - path: "./data" unit: time: h power: W diff --git a/test/data/gcp_instances.json b/test/data/gcp_instances.json index b9dc839..de851f6 100644 --- a/test/data/gcp_instances.json +++ b/test/data/gcp_instances.json @@ -35,7 +35,7 @@ "name": "a2-highgpu-1g", "vcpus": 12, "gpus": [ - "nvidia-tesla-a100" + "testing-custom-data-file" ], "memoryMb": 87040, "cpuTypes": [