From da9399b7a94f3d40f16e42488123dfa97031f6b9 Mon Sep 17 00:00:00 2001 From: Blake Hatch Date: Tue, 4 Jun 2024 23:27:56 -0400 Subject: [PATCH] Add JSON config examples to store.rs (#967) --- nativelink-config/examples/redis.json | 123 +++++++++++++ nativelink-config/src/stores.rs | 254 ++++++++++++++++++++++++++ 2 files changed, 377 insertions(+) create mode 100644 nativelink-config/examples/redis.json diff --git a/nativelink-config/examples/redis.json b/nativelink-config/examples/redis.json new file mode 100644 index 000000000..5cb273659 --- /dev/null +++ b/nativelink-config/examples/redis.json @@ -0,0 +1,123 @@ +{ + "stores": { + "CAS_FAST_SLOW_STORE": { + "fast_slow": { + "fast": { + "redis_store": { + "addresses": [ + "redis://127.0.0.1:6379/", // Master node 1 + "redis://127.0.0.1:6380/", // Master node 2 + "redis://127.0.0.1:6381/", // Master node 3 + "redis://127.0.0.1:6382/", // Master node 3 + "redis://127.0.0.1:6383/", // Master node 3 + "redis://127.0.0.1:6384/" // Master node 3 + //"redis://172.18.0.2:6379/" // Master node 3 + // "redis://172.18.0.3:6379/", // Master node 3 + // "redis://172.18.0.4:6379/" // Master node 3 + ] + } + }, + "slow": { + "filesystem": { + "content_path": "/tmp/nativelink/data/content_path-index", + "temp_path": "/tmp/nativelink/data/tmp_path-index", + "eviction_policy": { + "max_bytes": 120000000000 + } + } + } + } + }, + "AC_FAST_SLOW_STORE": { + "fast_slow": { + "fast": { + "filesystem": { + "content_path": "/tmp/nativelink/data/content_path-index", + "temp_path": "/tmp/nativelink/data/tmp_path-index", + "eviction_policy": { + "max_bytes": 120000000000 + } + } + }, + "slow": { + "filesystem": { + "content_path": "/tmp/nativelink/data/content_path-ac", + "temp_path": "/tmp/nativelink/data/tmp_path-ac", + "eviction_policy": { + "max_bytes": 5000000000 + } + } + } + } + }, + "AC_MAIN_STORE": { + "completeness_checking": { + "backend": { + "ref_store": { + "name": "AC_FAST_SLOW_STORE" + } + }, + "cas_store": { + "ref_store": { + "name": "CAS_MAIN_STORE" + } + } + } + }, + "CAS_MAIN_STORE": { + "existence_cache": { + "backend": { + "compression": { + "compression_algorithm": { + "lz4": {} + }, + "backend": { + "ref_store": { + "name": "CAS_FAST_SLOW_STORE" + } + } + } + } + } + } + }, + "servers": [ + { + "listener": { + "http": { + "socket_address": "0.0.0.0:50051" + } + }, + "services": { + "cas": { + "main": { + "cas_store": "CAS_MAIN_STORE" + } + }, + "ac": { + "main": { + "ac_store": "AC_MAIN_STORE" + } + }, + "capabilities": {}, + "bytestream": { + "cas_stores": { + "main": "CAS_MAIN_STORE" + } + } + } + }, + { + "listener": { + "http": { + "socket_address": "0.0.0.0:50061" + } + }, + "services": { + "experimental_prometheus": { + "path": "/metrics" + } + } + } + ] + } diff --git a/nativelink-config/src/stores.rs b/nativelink-config/src/stores.rs index 3614efc58..db145255e 100644 --- a/nativelink-config/src/stores.rs +++ b/nativelink-config/src/stores.rs @@ -40,6 +40,18 @@ pub enum ConfigDigestHashFunction { #[derive(Serialize, Deserialize, Debug, Clone)] pub enum StoreConfig { /// Memory store will store all data in a hashmap in memory. + /// + /// **Example JSON Config:** + /// ```json + /// "memory": { + /// "eviction_policy": { + /// // 10mb. + /// "max_bytes": 10000000, + /// } + /// } + /// } + /// ``` + /// memory(MemoryStore), /// S3 store will use Amazon's S3 service as a backend to store @@ -48,6 +60,22 @@ pub enum StoreConfig { /// /// This configuration will never delete files, so you are /// responsible for purging old files in other ways. + /// + /// **Example JSON Config:** + /// ```json + /// "experimental_s3_store": { + /// "region": "eu-north-1", + /// "bucket": "crossplane-bucket-af79aeca9", + /// "key_prefix": "test-prefix-index/", + /// "retry": { + /// "max_retries": 6, + /// "delay": 0.3, + /// "jitter": 0.5 + /// }, + /// "multipart_max_concurrent_uploads": 10 + /// } + /// ``` + /// experimental_s3_store(S3Store), /// Verify store is used to apply verifications to an underlying @@ -58,12 +86,48 @@ pub enum StoreConfig { /// /// The suggested configuration is to have the CAS validate the /// hash and size and the AC validate nothing. + /// + /// **Example JSON Config:** + /// ```json + /// "verify": { + /// "memory": { + /// "eviction_policy": { + /// "max_bytes": 500000000 // 500mb. + /// } + /// }, + /// "verify_size": true, + /// "hash_verification_function": "sha256" + /// } + /// ``` + /// verify(Box), /// Completeness checking store verifies if the /// output files & folders exist in the CAS before forwarding /// the request to the underlying store. /// Note: This store should only be used on AC stores. + /// + /// **Example JSON Config:** + /// ```json + /// "completeness_checking": { + /// "backend": { + /// "filesystem": { + /// "content_path": "~/.cache/nativelink/content_path-ac", + /// "temp_path": "~/.cache/nativelink/tmp_path-ac", + /// "eviction_policy": { + /// // 500mb. + /// "max_bytes": 500000000, + /// } + /// } + /// }, + /// "cas_store": { + /// "ref_store": { + /// "name": "CAS_MAIN_STORE" + /// } + /// } + /// } + /// ``` + /// completeness_checking(Box), /// A compression store that will compress the data inbound and @@ -72,6 +136,26 @@ pub enum StoreConfig { /// a store that requires network transport and/or storage space /// is a concern it is often faster and more efficient to use this /// store before those stores. + /// + /// **Example JSON Config:** + /// ```json + /// "compression": { + /// "compression_algorithm": { + /// "lz4": {} + /// }, + /// "backend": { + /// "filesystem": { + /// "content_path": "/tmp/nativelink/data/content_path-cas", + /// "temp_path": "/tmp/nativelink/data/tmp_path-cas", + /// "eviction_policy": { + /// // 2gb. + /// "max_bytes": 2000000000, + /// } + /// } + /// } + /// } + /// ``` + /// compression(Box), /// A dedup store will take the inputs and run a rolling hash @@ -98,6 +182,45 @@ pub enum StoreConfig { /// Note: When running `.has()` on this store, it will only check /// to see if the entry exists in the `index_store` and not check /// if the individual chunks exist in the `content_store`. + /// + /// **Example JSON Config:** + /// ```json + /// "dedup": { + /// "index_store": { + /// "memory_store": { + /// "max_size": 1000000000, // 1GB + /// "eviction_policy": "LeastRecentlyUsed" + /// } + /// }, + /// "content_store": { + /// "compression": { + /// "compression_algorithm": { + /// "lz4": {} + /// }, + /// "backend": { + /// "fast_slow": { + /// "fast": { + /// "memory_store": { + /// "max_size": 500000000, // 500MB + /// "eviction_policy": "LeastRecentlyUsed" + /// } + /// }, + /// "slow": { + /// "filesystem": { + /// "content_path": "/tmp/nativelink/data/content_path-content", + /// "temp_path": "/tmp/nativelink/data/tmp_path-content", + /// "eviction_policy": { + /// "max_bytes": 2000000000 // 2gb. + /// } + /// } + /// } + /// } + /// } + /// } + /// } + /// } + /// ``` + /// dedup(Box), /// Existence store will wrap around another store and cache calls @@ -105,6 +228,26 @@ pub enum StoreConfig { /// faster. This is useful for cases when you have a store that /// is slow to respond to has calls. /// Note: This store should only be used on CAS stores. + /// + /// **Example JSON Config:** + /// ```json + /// "existence_cache": { + /// "backend": { + /// "memory": { + /// "eviction_policy": { + /// // 500mb. + /// "max_bytes": 500000000, + /// } + /// } + /// }, + /// "cas_store": { + /// "ref_store": { + /// "name": "CAS_MAIN_STORE" + /// } + /// } + /// } + /// ``` + /// existence_cache(Box), /// FastSlow store will first try to fetch the data from the `fast` @@ -121,12 +264,55 @@ pub enum StoreConfig { /// `slow` store if it exists in the `fast` store (ie: it assumes /// that if an object exists `fast` store it will exist in `slow` /// store). + /// + /// ***Example JSON Config:*** + /// ```json + /// "fast_slow": { + /// "fast": { + /// "filesystem": { + /// "content_path": "/tmp/nativelink/data/content_path-index", + /// "temp_path": "/tmp/nativelink/data/tmp_path-index", + /// "eviction_policy": { + /// // 500mb. + /// "max_bytes": 500000000, + /// } + /// } + /// }, + /// "slow": { + /// "filesystem": { + /// "content_path": "/tmp/nativelink/data/content_path-index", + /// "temp_path": "/tmp/nativelink/data/tmp_path-index", + /// "eviction_policy": { + /// // 500mb. + /// "max_bytes": 500000000, + /// } + /// } + /// } + /// } + /// ``` + /// fast_slow(Box), /// Shards the data to multiple stores. This is useful for cases /// when you want to distribute the load across multiple stores. /// The digest hash is used to determine which store to send the /// data to. + /// + /// **Example JSON Config:** + /// ```json + /// "shard": { + /// "stores": [ + /// "memory": { + /// "eviction_policy": { + /// // 10mb. + /// "max_bytes": 10000000 + /// }, + /// "weight": 1 + /// } + /// ] + /// } + /// ``` + /// shard(ShardStore), /// Stores the data on the filesystem. This store is designed for @@ -135,6 +321,19 @@ pub enum StoreConfig { /// as long as the filesystem integrity holds. This store uses the /// filesystem's `atime` (access time) to hold the last touched time /// of the file(s). + /// + /// **Example JSON Config:** + /// ```json + /// "filesystem": { + /// "content_path": "/tmp/nativelink/data-worker-test/content_path-cas", + /// "temp_path": "/tmp/nativelink/data-worker-test/tmp_path-cas", + /// "eviction_policy": { + /// // 10gb. + /// "max_bytes": 10000000000, + /// } + /// } + /// ``` + /// filesystem(FilesystemStore), /// Store used to reference a store in the root store manager. @@ -142,6 +341,14 @@ pub enum StoreConfig { /// nested stores. Example, you may want to share the same memory store /// used for the action cache, but use a FastSlowStore and have the fast /// store also share the memory store for efficiency. + /// + /// **Example JSON Config:** + /// ```json + /// "ref_store": { + /// "name": "FS_CONTENT_STORE" + /// } + /// ``` + /// ref_store(RefStore), /// Uses the size field of the digest to separate which store to send the @@ -151,6 +358,25 @@ pub enum StoreConfig { /// words, don't use on AC (Action Cache) stores. Any store where you can /// safely use VerifyStore.verify_size = true, this store should be safe /// to use (ie: CAS stores). + /// + /// **Example JSON Config:** + /// ```json + /// "size_partitioning": { + /// "size": 134217728, // 128mib. + /// "lower_store": { + /// "memory": { + /// "eviction_policy": { + /// "max_bytes": "${NATIVELINK_CAS_MEMORY_CONTENT_LIMIT:-100000000}" + /// } + /// } + /// }, + /// "upper_store": { + /// /// This store discards data larger than 128mib. + /// "noop": {} + /// } + /// } + /// ``` + /// size_partitioning(Box), /// This store will pass-through calls to another GRPC store. This store @@ -162,6 +388,18 @@ pub enum StoreConfig { /// when this store is serving the a CAS store, not an AC store. If using /// this store directly without being a child of any store there are no /// side effects and is the most efficient way to use it. + /// + /// **Example JSON Config:** + /// ```json + /// "grpc": { + /// "instance_name": "main", + /// "endpoints": [ + /// {"address": "grpc://${CAS_ENDPOINT:-127.0.0.1}:50051"} + /// ], + /// "store_type": "ac" + /// } + /// ``` + /// grpc(GrpcStore), /// Stores data in any stores compatible with Redis APIs. @@ -169,12 +407,28 @@ pub enum StoreConfig { /// Pairs well with SizePartitioning and/or FastSlow stores. /// Ideal for accepting small object sizes as most redis store /// services have a max file upload of between 256Mb-512Mb. + /// + /// **Example JSON Config:** + /// ```json + /// "redis_store": { + /// "addresses": [ + /// "redis://127.0.0.1:6379/", + /// ] + /// } + /// ``` + /// redis_store(RedisStore), /// Noop store is a store that sends streams into the void and all data /// retrieval will return 404 (NotFound). This can be useful for cases /// where you may need to partition your data and part of your data needs /// to be discarded. + /// + /// **Example JSON Config:** + /// ```json + /// "noop": {} + /// ``` + /// noop, }