diff --git a/Cargo.lock b/Cargo.lock index ae0fa0882d..ec65979338 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -369,7 +369,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.87", ] [[package]] @@ -391,7 +391,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.87", ] [[package]] @@ -402,7 +402,7 @@ checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.87", ] [[package]] @@ -411,6 +411,12 @@ version = "0.15.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ae037714f313c1353189ead58ef9eec30a8e8dc101b2622d461418fd59e28a9" +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + [[package]] name = "atty" version = "0.2.14" @@ -488,8 +494,8 @@ dependencies = [ "bytes", "fastrand 1.9.0", "hex", - "http", - "hyper", + "http 0.2.12", + "hyper 0.14.30", "ring 0.16.20", "time", "tokio", @@ -521,7 +527,7 @@ dependencies = [ "aws-smithy-http", "aws-smithy-types", "aws-types", - "http", + "http 0.2.12", "regex", "tracing", ] @@ -537,8 +543,8 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "http", - "http-body", + "http 0.2.12", + "http-body 0.4.6", "lazy_static", "percent-encoding", "pin-project-lite", @@ -567,8 +573,8 @@ dependencies = [ "aws-smithy-xml", "aws-types", "bytes", - "http", - "http-body", + "http 0.2.12", + "http-body 0.4.6", "once_cell", "percent-encoding", "regex", @@ -596,7 +602,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "http", + "http 0.2.12", "regex", "tokio-stream", "tower", @@ -623,7 +629,7 @@ dependencies = [ "aws-smithy-xml", "aws-types", "bytes", - "http", + "http 0.2.12", "regex", "tower", "tracing", @@ -640,7 +646,7 @@ dependencies = [ "aws-smithy-eventstream", "aws-smithy-http", "aws-types", - "http", + "http 0.2.12", "tracing", ] @@ -656,7 +662,7 @@ dependencies = [ "form_urlencoded", "hex", "hmac", - "http", + "http 0.2.12", "once_cell", "percent-encoding", "regex", @@ -689,8 +695,8 @@ dependencies = [ "crc32c", "crc32fast", "hex", - "http", - "http-body", + "http 0.2.12", + "http-body 0.4.6", "md-5", "pin-project-lite", "sha1 0.10.6", @@ -710,9 +716,9 @@ dependencies = [ "aws-smithy-types", "bytes", "fastrand 1.9.0", - "http", - "http-body", - "hyper", + "http 0.2.12", + "http-body 0.4.6", + "hyper 0.14.30", "hyper-tls", "pin-project-lite", "tokio", @@ -742,9 +748,9 @@ dependencies = [ "bytes", "bytes-utils", "futures-core", - "http", - "http-body", - "hyper", + "http 0.2.12", + "http-body 0.4.6", + "hyper 0.14.30", "once_cell", "percent-encoding", "pin-project-lite", @@ -763,8 +769,8 @@ dependencies = [ "aws-smithy-http", "aws-smithy-types", "bytes", - "http", - "http-body", + "http 0.2.12", + "http-body 0.4.6", "pin-project-lite", "tower", "tracing", @@ -822,11 +828,58 @@ dependencies = [ "aws-smithy-client", "aws-smithy-http", "aws-smithy-types", - "http", + "http 0.2.12", "rustc_version", "tracing", ] +[[package]] +name = "axum" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf" +dependencies = [ + "async-trait", + "axum-core", + "bytes", + "futures-util", + "http 1.1.0", + "http-body 1.0.1", + "http-body-util", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "sync_wrapper 1.0.1", + "tower", + "tower-layer", + "tower-service", +] + +[[package]] +name = "axum-core" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http 1.1.0", + "http-body 1.0.1", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper 1.0.1", + "tower-layer", + "tower-service", +] + [[package]] name = "azure_core" version = "0.17.0" @@ -1102,7 +1155,7 @@ checksum = "1ee891b04274a59bd38b412188e24b849617b2e45a0fd8d057deb63e7403761b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.87", ] [[package]] @@ -1301,7 +1354,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.87", ] [[package]] @@ -2033,7 +2086,7 @@ dependencies = [ "google-cloud-storage", "google-cloud-token", "home", - "hyper", + "hyper 0.14.30", "hyper-tls", "itertools 0.11.0", "lazy_static", @@ -2410,7 +2463,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.74", + "syn 2.0.87", ] [[package]] @@ -2421,7 +2474,7 @@ checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" dependencies = [ "darling_core", "quote", - "syn 2.0.74", + "syn 2.0.87", ] [[package]] @@ -2480,7 +2533,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.87", ] [[package]] @@ -2490,7 +2543,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn 2.0.74", + "syn 2.0.87", ] [[package]] @@ -2510,7 +2563,7 @@ checksum = "cb7330aeadfbe296029522e6c40f315320aba36fc43a5b3632f3795348f3bd22" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.87", "unicode-xid", ] @@ -2841,7 +2894,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.87", ] [[package]] @@ -3050,7 +3103,26 @@ dependencies = [ "futures-core", "futures-sink", "futures-util", - "http", + "http 0.2.12", + "indexmap 2.5.0", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "h2" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http 1.1.0", "indexmap 2.5.0", "slab", "tokio", @@ -3194,6 +3266,17 @@ dependencies = [ "itoa", ] +[[package]] +name = "http" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + [[package]] name = "http-body" version = "0.4.6" @@ -3201,7 +3284,30 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" dependencies = [ "bytes", - "http", + "http 0.2.12", + "pin-project-lite", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http 1.1.0", +] + +[[package]] +name = "http-body-util" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" +dependencies = [ + "bytes", + "futures-util", + "http 1.1.0", + "http-body 1.0.1", "pin-project-lite", ] @@ -3256,9 +3362,9 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", - "h2", - "http", - "http-body", + "h2 0.3.26", + "http 0.2.12", + "http-body 0.4.6", "httparse", "httpdate", "itoa", @@ -3270,6 +3376,40 @@ dependencies = [ "want", ] +[[package]] +name = "hyper" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbbff0a806a4728c99295b254c8838933b5b082d75e3cb70c8dab21fdfbcfa9a" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "h2 0.4.6", + "http 1.1.0", + "http-body 1.0.1", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "smallvec", + "tokio", + "want", +] + +[[package]] +name = "hyper-timeout" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" +dependencies = [ + "hyper 1.5.0", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", +] + [[package]] name = "hyper-tls" version = "0.5.0" @@ -3277,12 +3417,31 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" dependencies = [ "bytes", - "hyper", + "hyper 0.14.30", "native-tls", "tokio", "tokio-native-tls", ] +[[package]] +name = "hyper-util" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http 1.1.0", + "http-body 1.0.1", + "hyper 1.5.0", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", +] + [[package]] name = "hyperloglog" version = "0.3.0-dev0" @@ -3756,6 +3915,12 @@ dependencies = [ "regex-automata 0.1.10", ] +[[package]] +name = "matchit" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" + [[package]] name = "matrixmultiply" version = "0.3.9" @@ -4066,7 +4231,7 @@ dependencies = [ "base64 0.13.1", "chrono", "getrandom 0.2.15", - "http", + "http 0.2.12", "rand 0.8.5", "serde", "serde_json", @@ -4126,7 +4291,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.87", ] [[package]] @@ -4350,7 +4515,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.87", ] [[package]] @@ -4517,6 +4682,16 @@ dependencies = [ "prost-derive 0.11.9", ] +[[package]] +name = "prost" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" +dependencies = [ + "bytes", + "prost-derive 0.13.3", +] + [[package]] name = "prost-derive" version = "0.9.0" @@ -4543,6 +4718,28 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "prost-derive" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" +dependencies = [ + "anyhow", + "itertools 0.11.0", + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "prost-types" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670" +dependencies = [ + "prost 0.13.3", +] + [[package]] name = "pyo3" version = "0.21.2" @@ -4603,7 +4800,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.74", + "syn 2.0.87", ] [[package]] @@ -4616,7 +4813,7 @@ dependencies = [ "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.74", + "syn 2.0.87", ] [[package]] @@ -4803,7 +5000,7 @@ checksum = "bcc303e793d3734489387d205e9b186fac9c6cfacedd98cbb2e8a5943595f3e6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.87", ] [[package]] @@ -4873,10 +5070,10 @@ dependencies = [ "encoding_rs", "futures-core", "futures-util", - "h2", - "http", - "http-body", - "hyper", + "h2 0.3.26", + "http 0.2.12", + "http-body 0.4.6", + "hyper 0.14.30", "hyper-tls", "ipnet", "js-sys", @@ -4891,7 +5088,7 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded", - "sync_wrapper", + "sync_wrapper 0.1.2", "system-configuration", "tokio", "tokio-native-tls", @@ -4966,7 +5163,7 @@ dependencies = [ "regex", "relative-path", "rustc_version", - "syn 2.0.74", + "syn 2.0.87", "unicode-ident", ] @@ -5124,7 +5321,7 @@ checksum = "1db149f81d46d2deba7cd3c50772474707729550221e69588478ebf9ada425ae" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.87", ] [[package]] @@ -5201,7 +5398,7 @@ checksum = "fabfb6138d2383ea8208cf98ccf69cdfb1aff4088460681d84189aa259762f97" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.87", ] [[package]] @@ -5429,6 +5626,15 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "spark-connect" +version = "0.3.0-dev0" +dependencies = [ + "prost 0.13.3", + "prost-types", + "tonic", +] + [[package]] name = "spin" version = "0.5.2" @@ -5549,7 +5755,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.74", + "syn 2.0.87", ] [[package]] @@ -5581,9 +5787,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.74" +version = "2.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fceb41e3d546d0bd83421d3409b1460cc7444cd389341a4c880fe7a042cb3d7" +checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" dependencies = [ "proc-macro2", "quote", @@ -5596,6 +5802,12 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" +[[package]] +name = "sync_wrapper" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" + [[package]] name = "synstructure" version = "0.12.6" @@ -5719,7 +5931,7 @@ checksum = "5999e24eaa32083191ba4e425deb75cdf25efefabe5aaccb7446dd0d4122a3f5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.87", ] [[package]] @@ -5750,7 +5962,7 @@ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.87", ] [[package]] @@ -5903,7 +6115,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.87", ] [[package]] @@ -5918,9 +6130,9 @@ dependencies = [ [[package]] name = "tokio-stream" -version = "0.1.15" +version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "267ac89e0bec6e691e5813911606935d77c476ff49024f98abcea3e7b15e37af" +checksum = "4f4e6ce100d0eb49a2734f8c0812bcd324cf357d21810932c5df6b96ef2b86f1" dependencies = [ "futures-core", "pin-project-lite", @@ -5941,6 +6153,36 @@ dependencies = [ "tokio", ] +[[package]] +name = "tonic" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" +dependencies = [ + "async-stream", + "async-trait", + "axum", + "base64 0.22.1", + "bytes", + "h2 0.4.6", + "http 1.1.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.5.0", + "hyper-timeout", + "hyper-util", + "percent-encoding", + "pin-project", + "prost 0.13.3", + "socket2", + "tokio", + "tokio-stream", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "tower" version = "0.4.13" @@ -5949,9 +6191,13 @@ checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" dependencies = [ "futures-core", "futures-util", + "indexmap 1.9.3", "pin-project", "pin-project-lite", + "rand 0.8.5", + "slab", "tokio", + "tokio-util", "tower-layer", "tower-service", "tracing", @@ -5989,7 +6235,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.87", ] [[package]] @@ -6102,7 +6348,7 @@ checksum = "70b20a22c42c8f1cd23ce5e34f165d4d37038f5b663ad20fb6adbdf029172483" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.87", ] [[package]] @@ -6345,7 +6591,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.87", "wasm-bindgen-shared", ] @@ -6379,7 +6625,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.87", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -6693,7 +6939,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.87", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 98d82ba872..5204ac2f81 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -154,7 +154,9 @@ members = [ "src/daft-table", "src/daft-writers", "src/hyperloglog", - "src/parquet2" + "src/parquet2", + # "src/spark-connect-script", + "src/generated/spark-connect" ] [workspace.dependencies] diff --git a/src/generated/spark-connect/Cargo.toml b/src/generated/spark-connect/Cargo.toml new file mode 100644 index 0000000000..6dd0a6c388 --- /dev/null +++ b/src/generated/spark-connect/Cargo.toml @@ -0,0 +1,12 @@ +[dependencies] +prost = "0.13.3" +prost-types = "0.13.3" +tonic = "0.12.3" + +[lints] +workspace = true + +[package] +edition = {workspace = true} +name = "spark-connect" +version = {workspace = true} diff --git a/src/generated/spark-connect/src/lib.rs b/src/generated/spark-connect/src/lib.rs new file mode 100644 index 0000000000..8a4313010b --- /dev/null +++ b/src/generated/spark-connect/src/lib.rs @@ -0,0 +1,5769 @@ +#![allow(clippy::derive_partial_eq_without_eq)] + +// This file is @generated by prost-build. +/// StorageLevel for persisting Datasets/Tables. +#[derive(Clone, Copy, PartialEq, ::prost::Message)] +pub struct StorageLevel { + /// (Required) Whether the cache should use disk or not. + #[prost(bool, tag = "1")] + pub use_disk: bool, + /// (Required) Whether the cache should use memory or not. + #[prost(bool, tag = "2")] + pub use_memory: bool, + /// (Required) Whether the cache should use off-heap or not. + #[prost(bool, tag = "3")] + pub use_off_heap: bool, + /// (Required) Whether the cached data is deserialized or not. + #[prost(bool, tag = "4")] + pub deserialized: bool, + /// (Required) The number of replicas. + #[prost(int32, tag = "5")] + pub replication: i32, +} +/// ResourceInformation to hold information about a type of Resource. +/// The corresponding class is 'org.apache.spark.resource.ResourceInformation' +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ResourceInformation { + /// (Required) The name of the resource + #[prost(string, tag = "1")] + pub name: ::prost::alloc::string::String, + /// (Required) An array of strings describing the addresses of the resource. + #[prost(string, repeated, tag = "2")] + pub addresses: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, +} +/// An executor resource request. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExecutorResourceRequest { + /// (Required) resource name. + #[prost(string, tag = "1")] + pub resource_name: ::prost::alloc::string::String, + /// (Required) resource amount requesting. + #[prost(int64, tag = "2")] + pub amount: i64, + /// Optional script used to discover the resources. + #[prost(string, optional, tag = "3")] + pub discovery_script: ::core::option::Option<::prost::alloc::string::String>, + /// Optional vendor, required for some cluster managers. + #[prost(string, optional, tag = "4")] + pub vendor: ::core::option::Option<::prost::alloc::string::String>, +} +/// A task resource request. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct TaskResourceRequest { + /// (Required) resource name. + #[prost(string, tag = "1")] + pub resource_name: ::prost::alloc::string::String, + /// (Required) resource amount requesting as a double to support fractional + /// resource requests. + #[prost(double, tag = "2")] + pub amount: f64, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ResourceProfile { + /// (Optional) Resource requests for executors. Mapped from the resource name + /// (e.g., cores, memory, CPU) to its specific request. + #[prost(map = "string, message", tag = "1")] + pub executor_resources: + ::std::collections::HashMap<::prost::alloc::string::String, ExecutorResourceRequest>, + /// (Optional) Resource requests for tasks. Mapped from the resource name + /// (e.g., cores, memory, CPU) to its specific request. + #[prost(map = "string, message", tag = "2")] + pub task_resources: + ::std::collections::HashMap<::prost::alloc::string::String, TaskResourceRequest>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Origin { + /// (Required) Indicate the origin type. + #[prost(oneof = "origin::Function", tags = "1")] + pub function: ::core::option::Option, +} +/// Nested message and enum types in `Origin`. +pub mod origin { + /// (Required) Indicate the origin type. + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Function { + #[prost(message, tag = "1")] + PythonOrigin(super::PythonOrigin), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PythonOrigin { + /// (Required) Name of the origin, for example, the name of the function + #[prost(string, tag = "1")] + pub fragment: ::prost::alloc::string::String, + /// (Required) Callsite to show to end users, for example, stacktrace. + #[prost(string, tag = "2")] + pub call_site: ::prost::alloc::string::String, +} +/// This message describes the logical \[[DataType]\] of something. It does not carry the value +/// itself but only describes it. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct DataType { + #[prost( + oneof = "data_type::Kind", + tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 25, 23, 24" + )] + pub kind: ::core::option::Option, +} +/// Nested message and enum types in `DataType`. +pub mod data_type { + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct Boolean { + #[prost(uint32, tag = "1")] + pub type_variation_reference: u32, + } + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct Byte { + #[prost(uint32, tag = "1")] + pub type_variation_reference: u32, + } + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct Short { + #[prost(uint32, tag = "1")] + pub type_variation_reference: u32, + } + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct Integer { + #[prost(uint32, tag = "1")] + pub type_variation_reference: u32, + } + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct Long { + #[prost(uint32, tag = "1")] + pub type_variation_reference: u32, + } + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct Float { + #[prost(uint32, tag = "1")] + pub type_variation_reference: u32, + } + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct Double { + #[prost(uint32, tag = "1")] + pub type_variation_reference: u32, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct String { + #[prost(uint32, tag = "1")] + pub type_variation_reference: u32, + #[prost(string, tag = "2")] + pub collation: ::prost::alloc::string::String, + } + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct Binary { + #[prost(uint32, tag = "1")] + pub type_variation_reference: u32, + } + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct Null { + #[prost(uint32, tag = "1")] + pub type_variation_reference: u32, + } + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct Timestamp { + #[prost(uint32, tag = "1")] + pub type_variation_reference: u32, + } + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct Date { + #[prost(uint32, tag = "1")] + pub type_variation_reference: u32, + } + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct TimestampNtz { + #[prost(uint32, tag = "1")] + pub type_variation_reference: u32, + } + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct CalendarInterval { + #[prost(uint32, tag = "1")] + pub type_variation_reference: u32, + } + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct YearMonthInterval { + #[prost(int32, optional, tag = "1")] + pub start_field: ::core::option::Option, + #[prost(int32, optional, tag = "2")] + pub end_field: ::core::option::Option, + #[prost(uint32, tag = "3")] + pub type_variation_reference: u32, + } + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct DayTimeInterval { + #[prost(int32, optional, tag = "1")] + pub start_field: ::core::option::Option, + #[prost(int32, optional, tag = "2")] + pub end_field: ::core::option::Option, + #[prost(uint32, tag = "3")] + pub type_variation_reference: u32, + } + /// Start compound types. + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct Char { + #[prost(int32, tag = "1")] + pub length: i32, + #[prost(uint32, tag = "2")] + pub type_variation_reference: u32, + } + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct VarChar { + #[prost(int32, tag = "1")] + pub length: i32, + #[prost(uint32, tag = "2")] + pub type_variation_reference: u32, + } + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct Decimal { + #[prost(int32, optional, tag = "1")] + pub scale: ::core::option::Option, + #[prost(int32, optional, tag = "2")] + pub precision: ::core::option::Option, + #[prost(uint32, tag = "3")] + pub type_variation_reference: u32, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct StructField { + #[prost(string, tag = "1")] + pub name: ::prost::alloc::string::String, + #[prost(message, optional, tag = "2")] + pub data_type: ::core::option::Option, + #[prost(bool, tag = "3")] + pub nullable: bool, + #[prost(string, optional, tag = "4")] + pub metadata: ::core::option::Option<::prost::alloc::string::String>, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Struct { + #[prost(message, repeated, tag = "1")] + pub fields: ::prost::alloc::vec::Vec, + #[prost(uint32, tag = "2")] + pub type_variation_reference: u32, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Array { + #[prost(message, optional, boxed, tag = "1")] + pub element_type: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(bool, tag = "2")] + pub contains_null: bool, + #[prost(uint32, tag = "3")] + pub type_variation_reference: u32, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Map { + #[prost(message, optional, boxed, tag = "1")] + pub key_type: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, boxed, tag = "2")] + pub value_type: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(bool, tag = "3")] + pub value_contains_null: bool, + #[prost(uint32, tag = "4")] + pub type_variation_reference: u32, + } + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct Variant { + #[prost(uint32, tag = "1")] + pub type_variation_reference: u32, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Udt { + #[prost(string, tag = "1")] + pub r#type: ::prost::alloc::string::String, + #[prost(string, optional, tag = "2")] + pub jvm_class: ::core::option::Option<::prost::alloc::string::String>, + #[prost(string, optional, tag = "3")] + pub python_class: ::core::option::Option<::prost::alloc::string::String>, + #[prost(string, optional, tag = "4")] + pub serialized_python_class: ::core::option::Option<::prost::alloc::string::String>, + #[prost(message, optional, boxed, tag = "5")] + pub sql_type: ::core::option::Option<::prost::alloc::boxed::Box>, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Unparsed { + /// (Required) The unparsed data type string + #[prost(string, tag = "1")] + pub data_type_string: ::prost::alloc::string::String, + } + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Kind { + #[prost(message, tag = "1")] + Null(Null), + #[prost(message, tag = "2")] + Binary(Binary), + #[prost(message, tag = "3")] + Boolean(Boolean), + /// Numeric types + #[prost(message, tag = "4")] + Byte(Byte), + #[prost(message, tag = "5")] + Short(Short), + #[prost(message, tag = "6")] + Integer(Integer), + #[prost(message, tag = "7")] + Long(Long), + #[prost(message, tag = "8")] + Float(Float), + #[prost(message, tag = "9")] + Double(Double), + #[prost(message, tag = "10")] + Decimal(Decimal), + /// String types + #[prost(message, tag = "11")] + String(String), + #[prost(message, tag = "12")] + Char(Char), + #[prost(message, tag = "13")] + VarChar(VarChar), + /// Datatime types + #[prost(message, tag = "14")] + Date(Date), + #[prost(message, tag = "15")] + Timestamp(Timestamp), + #[prost(message, tag = "16")] + TimestampNtz(TimestampNtz), + /// Interval types + #[prost(message, tag = "17")] + CalendarInterval(CalendarInterval), + #[prost(message, tag = "18")] + YearMonthInterval(YearMonthInterval), + #[prost(message, tag = "19")] + DayTimeInterval(DayTimeInterval), + /// Complex types + #[prost(message, tag = "20")] + Array(::prost::alloc::boxed::Box), + #[prost(message, tag = "21")] + Struct(Struct), + #[prost(message, tag = "22")] + Map(::prost::alloc::boxed::Box), + #[prost(message, tag = "25")] + Variant(Variant), + /// UserDefinedType + #[prost(message, tag = "23")] + Udt(::prost::alloc::boxed::Box), + /// UnparsedDataType + #[prost(message, tag = "24")] + Unparsed(Unparsed), + } +} +/// Expression used to refer to fields, functions and similar. This can be used everywhere +/// expressions in SQL appear. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Expression { + #[prost(message, optional, tag = "18")] + pub common: ::core::option::Option, + #[prost( + oneof = "expression::ExprType", + tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 999" + )] + pub expr_type: ::core::option::Option, +} +/// Nested message and enum types in `Expression`. +pub mod expression { + /// Expression for the OVER clause or WINDOW clause. + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Window { + /// (Required) The window function. + #[prost(message, optional, boxed, tag = "1")] + pub window_function: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Optional) The way that input rows are partitioned. + #[prost(message, repeated, tag = "2")] + pub partition_spec: ::prost::alloc::vec::Vec, + /// (Optional) Ordering of rows in a partition. + #[prost(message, repeated, tag = "3")] + pub order_spec: ::prost::alloc::vec::Vec, + /// (Optional) Window frame in a partition. + /// + /// If not set, it will be treated as 'UnspecifiedFrame'. + #[prost(message, optional, boxed, tag = "4")] + pub frame_spec: ::core::option::Option<::prost::alloc::boxed::Box>, + } + /// Nested message and enum types in `Window`. + pub mod window { + /// The window frame + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct WindowFrame { + /// (Required) The type of the frame. + #[prost(enumeration = "window_frame::FrameType", tag = "1")] + pub frame_type: i32, + /// (Required) The lower bound of the frame. + #[prost(message, optional, boxed, tag = "2")] + pub lower: + ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) The upper bound of the frame. + #[prost(message, optional, boxed, tag = "3")] + pub upper: + ::core::option::Option<::prost::alloc::boxed::Box>, + } + /// Nested message and enum types in `WindowFrame`. + pub mod window_frame { + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct FrameBoundary { + #[prost(oneof = "frame_boundary::Boundary", tags = "1, 2, 3")] + pub boundary: ::core::option::Option, + } + /// Nested message and enum types in `FrameBoundary`. + pub mod frame_boundary { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Boundary { + /// CURRENT ROW boundary + #[prost(bool, tag = "1")] + CurrentRow(bool), + /// UNBOUNDED boundary. + /// For lower bound, it will be converted to 'UnboundedPreceding'. + /// for upper bound, it will be converted to 'UnboundedFollowing'. + #[prost(bool, tag = "2")] + Unbounded(bool), + /// This is an expression for future proofing. We are expecting literals on the server side. + #[prost(message, tag = "3")] + Value(::prost::alloc::boxed::Box), + } + } + #[derive( + Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration, + )] + #[repr(i32)] + pub enum FrameType { + Undefined = 0, + /// RowFrame treats rows in a partition individually. + Row = 1, + /// RangeFrame treats rows in a partition as groups of peers. + /// All rows having the same 'ORDER BY' ordering are considered as peers. + Range = 2, + } + impl FrameType { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + Self::Undefined => "FRAME_TYPE_UNDEFINED", + Self::Row => "FRAME_TYPE_ROW", + Self::Range => "FRAME_TYPE_RANGE", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "FRAME_TYPE_UNDEFINED" => Some(Self::Undefined), + "FRAME_TYPE_ROW" => Some(Self::Row), + "FRAME_TYPE_RANGE" => Some(Self::Range), + _ => None, + } + } + } + } + } + /// SortOrder is used to specify the data ordering, it is normally used in Sort and Window. + /// It is an unevaluable expression and cannot be evaluated, so can not be used in Projection. + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct SortOrder { + /// (Required) The expression to be sorted. + #[prost(message, optional, boxed, tag = "1")] + pub child: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) The sort direction, should be ASCENDING or DESCENDING. + #[prost(enumeration = "sort_order::SortDirection", tag = "2")] + pub direction: i32, + /// (Required) How to deal with NULLs, should be NULLS_FIRST or NULLS_LAST. + #[prost(enumeration = "sort_order::NullOrdering", tag = "3")] + pub null_ordering: i32, + } + /// Nested message and enum types in `SortOrder`. + pub mod sort_order { + #[derive( + Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration, + )] + #[repr(i32)] + pub enum SortDirection { + Unspecified = 0, + Ascending = 1, + Descending = 2, + } + impl SortDirection { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + Self::Unspecified => "SORT_DIRECTION_UNSPECIFIED", + Self::Ascending => "SORT_DIRECTION_ASCENDING", + Self::Descending => "SORT_DIRECTION_DESCENDING", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "SORT_DIRECTION_UNSPECIFIED" => Some(Self::Unspecified), + "SORT_DIRECTION_ASCENDING" => Some(Self::Ascending), + "SORT_DIRECTION_DESCENDING" => Some(Self::Descending), + _ => None, + } + } + } + #[derive( + Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration, + )] + #[repr(i32)] + pub enum NullOrdering { + SortNullsUnspecified = 0, + SortNullsFirst = 1, + SortNullsLast = 2, + } + impl NullOrdering { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + Self::SortNullsUnspecified => "SORT_NULLS_UNSPECIFIED", + Self::SortNullsFirst => "SORT_NULLS_FIRST", + Self::SortNullsLast => "SORT_NULLS_LAST", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "SORT_NULLS_UNSPECIFIED" => Some(Self::SortNullsUnspecified), + "SORT_NULLS_FIRST" => Some(Self::SortNullsFirst), + "SORT_NULLS_LAST" => Some(Self::SortNullsLast), + _ => None, + } + } + } + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Cast { + /// (Required) the expression to be casted. + #[prost(message, optional, boxed, tag = "1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Optional) The expression evaluation mode. + #[prost(enumeration = "cast::EvalMode", tag = "4")] + pub eval_mode: i32, + /// (Required) the data type that the expr to be casted to. + #[prost(oneof = "cast::CastToType", tags = "2, 3")] + pub cast_to_type: ::core::option::Option, + } + /// Nested message and enum types in `Cast`. + pub mod cast { + #[derive( + Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration, + )] + #[repr(i32)] + pub enum EvalMode { + Unspecified = 0, + Legacy = 1, + Ansi = 2, + Try = 3, + } + impl EvalMode { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + Self::Unspecified => "EVAL_MODE_UNSPECIFIED", + Self::Legacy => "EVAL_MODE_LEGACY", + Self::Ansi => "EVAL_MODE_ANSI", + Self::Try => "EVAL_MODE_TRY", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "EVAL_MODE_UNSPECIFIED" => Some(Self::Unspecified), + "EVAL_MODE_LEGACY" => Some(Self::Legacy), + "EVAL_MODE_ANSI" => Some(Self::Ansi), + "EVAL_MODE_TRY" => Some(Self::Try), + _ => None, + } + } + } + /// (Required) the data type that the expr to be casted to. + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum CastToType { + #[prost(message, tag = "2")] + Type(super::super::DataType), + /// If this is set, Server will use Catalyst parser to parse this string to DataType. + #[prost(string, tag = "3")] + TypeStr(::prost::alloc::string::String), + } + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Literal { + #[prost( + oneof = "literal::LiteralType", + tags = "1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 16, 17, 18, 19, 20, 21, 22, 23, 24" + )] + pub literal_type: ::core::option::Option, + } + /// Nested message and enum types in `Literal`. + pub mod literal { + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Decimal { + /// the string representation. + #[prost(string, tag = "1")] + pub value: ::prost::alloc::string::String, + /// The maximum number of digits allowed in the value. + /// the maximum precision is 38. + #[prost(int32, optional, tag = "2")] + pub precision: ::core::option::Option, + /// declared scale of decimal literal + #[prost(int32, optional, tag = "3")] + pub scale: ::core::option::Option, + } + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct CalendarInterval { + #[prost(int32, tag = "1")] + pub months: i32, + #[prost(int32, tag = "2")] + pub days: i32, + #[prost(int64, tag = "3")] + pub microseconds: i64, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Array { + #[prost(message, optional, tag = "1")] + pub element_type: ::core::option::Option, + #[prost(message, repeated, tag = "2")] + pub elements: ::prost::alloc::vec::Vec, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Map { + #[prost(message, optional, tag = "1")] + pub key_type: ::core::option::Option, + #[prost(message, optional, tag = "2")] + pub value_type: ::core::option::Option, + #[prost(message, repeated, tag = "3")] + pub keys: ::prost::alloc::vec::Vec, + #[prost(message, repeated, tag = "4")] + pub values: ::prost::alloc::vec::Vec, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Struct { + #[prost(message, optional, tag = "1")] + pub struct_type: ::core::option::Option, + #[prost(message, repeated, tag = "2")] + pub elements: ::prost::alloc::vec::Vec, + } + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum LiteralType { + #[prost(message, tag = "1")] + Null(super::super::DataType), + #[prost(bytes, tag = "2")] + Binary(::prost::alloc::vec::Vec), + #[prost(bool, tag = "3")] + Boolean(bool), + #[prost(int32, tag = "4")] + Byte(i32), + #[prost(int32, tag = "5")] + Short(i32), + #[prost(int32, tag = "6")] + Integer(i32), + #[prost(int64, tag = "7")] + Long(i64), + #[prost(float, tag = "10")] + Float(f32), + #[prost(double, tag = "11")] + Double(f64), + #[prost(message, tag = "12")] + Decimal(Decimal), + #[prost(string, tag = "13")] + String(::prost::alloc::string::String), + /// Date in units of days since the UNIX epoch. + #[prost(int32, tag = "16")] + Date(i32), + /// Timestamp in units of microseconds since the UNIX epoch. + #[prost(int64, tag = "17")] + Timestamp(i64), + /// Timestamp in units of microseconds since the UNIX epoch (without timezone information). + #[prost(int64, tag = "18")] + TimestampNtz(i64), + #[prost(message, tag = "19")] + CalendarInterval(CalendarInterval), + #[prost(int32, tag = "20")] + YearMonthInterval(i32), + #[prost(int64, tag = "21")] + DayTimeInterval(i64), + #[prost(message, tag = "22")] + Array(Array), + #[prost(message, tag = "23")] + Map(Map), + #[prost(message, tag = "24")] + Struct(Struct), + } + } + /// An unresolved attribute that is not explicitly bound to a specific column, but the column + /// is resolved during analysis by name. + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct UnresolvedAttribute { + /// (Required) An identifier that will be parsed by Catalyst parser. This should follow the + /// Spark SQL identifier syntax. + #[prost(string, tag = "1")] + pub unparsed_identifier: ::prost::alloc::string::String, + /// (Optional) The id of corresponding connect plan. + #[prost(int64, optional, tag = "2")] + pub plan_id: ::core::option::Option, + /// (Optional) The requested column is a metadata column. + #[prost(bool, optional, tag = "3")] + pub is_metadata_column: ::core::option::Option, + } + /// An unresolved function is not explicitly bound to one explicit function, but the function + /// is resolved during analysis following Sparks name resolution rules. + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct UnresolvedFunction { + /// (Required) name (or unparsed name for user defined function) for the unresolved function. + #[prost(string, tag = "1")] + pub function_name: ::prost::alloc::string::String, + /// (Optional) Function arguments. Empty arguments are allowed. + #[prost(message, repeated, tag = "2")] + pub arguments: ::prost::alloc::vec::Vec, + /// (Required) Indicate if this function should be applied on distinct values. + #[prost(bool, tag = "3")] + pub is_distinct: bool, + /// (Required) Indicate if this is a user defined function. + /// + /// When it is not a user defined function, Connect will use the function name directly. + /// When it is a user defined function, Connect will parse the function name first. + #[prost(bool, tag = "4")] + pub is_user_defined_function: bool, + } + /// Expression as string. + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct ExpressionString { + /// (Required) A SQL expression that will be parsed by Catalyst parser. + #[prost(string, tag = "1")] + pub expression: ::prost::alloc::string::String, + } + /// UnresolvedStar is used to expand all the fields of a relation or struct. + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct UnresolvedStar { + /// (Optional) The target of the expansion. + /// + /// If set, it should end with '.*' and will be parsed by 'parseAttributeName' + /// in the server side. + #[prost(string, optional, tag = "1")] + pub unparsed_target: ::core::option::Option<::prost::alloc::string::String>, + /// (Optional) The id of corresponding connect plan. + #[prost(int64, optional, tag = "2")] + pub plan_id: ::core::option::Option, + } + /// Represents all of the input attributes to a given relational operator, for example in + /// "SELECT `(id)?+.+` FROM ...". + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct UnresolvedRegex { + /// (Required) The column name used to extract column with regex. + #[prost(string, tag = "1")] + pub col_name: ::prost::alloc::string::String, + /// (Optional) The id of corresponding connect plan. + #[prost(int64, optional, tag = "2")] + pub plan_id: ::core::option::Option, + } + /// Extracts a value or values from an Expression + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct UnresolvedExtractValue { + /// (Required) The expression to extract value from, can be + /// Map, Array, Struct or array of Structs. + #[prost(message, optional, boxed, tag = "1")] + pub child: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) The expression to describe the extraction, can be + /// key of Map, index of Array, field name of Struct. + #[prost(message, optional, boxed, tag = "2")] + pub extraction: ::core::option::Option<::prost::alloc::boxed::Box>, + } + /// Add, replace or drop a field of `StructType` expression by name. + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct UpdateFields { + /// (Required) The struct expression. + #[prost(message, optional, boxed, tag = "1")] + pub struct_expression: + ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) The field name. + #[prost(string, tag = "2")] + pub field_name: ::prost::alloc::string::String, + /// (Optional) The expression to add or replace. + /// + /// When not set, it means this field will be dropped. + #[prost(message, optional, boxed, tag = "3")] + pub value_expression: ::core::option::Option<::prost::alloc::boxed::Box>, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Alias { + /// (Required) The expression that alias will be added on. + #[prost(message, optional, boxed, tag = "1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) a list of name parts for the alias. + /// + /// Scalar columns only has one name that presents. + #[prost(string, repeated, tag = "2")] + pub name: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + /// (Optional) Alias metadata expressed as a JSON map. + #[prost(string, optional, tag = "3")] + pub metadata: ::core::option::Option<::prost::alloc::string::String>, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct LambdaFunction { + /// (Required) The lambda function. + /// + /// The function body should use 'UnresolvedAttribute' as arguments, the sever side will + /// replace 'UnresolvedAttribute' with 'UnresolvedNamedLambdaVariable'. + #[prost(message, optional, boxed, tag = "1")] + pub function: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) Function variables. Must contains 1 ~ 3 variables. + #[prost(message, repeated, tag = "2")] + pub arguments: ::prost::alloc::vec::Vec, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct UnresolvedNamedLambdaVariable { + /// (Required) a list of name parts for the variable. Must not be empty. + #[prost(string, repeated, tag = "1")] + pub name_parts: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + } + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum ExprType { + #[prost(message, tag = "1")] + Literal(Literal), + #[prost(message, tag = "2")] + UnresolvedAttribute(UnresolvedAttribute), + #[prost(message, tag = "3")] + UnresolvedFunction(UnresolvedFunction), + #[prost(message, tag = "4")] + ExpressionString(ExpressionString), + #[prost(message, tag = "5")] + UnresolvedStar(UnresolvedStar), + #[prost(message, tag = "6")] + Alias(::prost::alloc::boxed::Box), + #[prost(message, tag = "7")] + Cast(::prost::alloc::boxed::Box), + #[prost(message, tag = "8")] + UnresolvedRegex(UnresolvedRegex), + #[prost(message, tag = "9")] + SortOrder(::prost::alloc::boxed::Box), + #[prost(message, tag = "10")] + LambdaFunction(::prost::alloc::boxed::Box), + #[prost(message, tag = "11")] + Window(::prost::alloc::boxed::Box), + #[prost(message, tag = "12")] + UnresolvedExtractValue(::prost::alloc::boxed::Box), + #[prost(message, tag = "13")] + UpdateFields(::prost::alloc::boxed::Box), + #[prost(message, tag = "14")] + UnresolvedNamedLambdaVariable(UnresolvedNamedLambdaVariable), + #[prost(message, tag = "15")] + CommonInlineUserDefinedFunction(super::CommonInlineUserDefinedFunction), + #[prost(message, tag = "16")] + CallFunction(super::CallFunction), + #[prost(message, tag = "17")] + NamedArgumentExpression(::prost::alloc::boxed::Box), + #[prost(message, tag = "19")] + MergeAction(::prost::alloc::boxed::Box), + #[prost(message, tag = "20")] + TypedAggregateExpression(super::TypedAggregateExpression), + /// This field is used to mark extensions to the protocol. When plugins generate arbitrary + /// relations they can add them here. During the planning the correct resolution is done. + #[prost(message, tag = "999")] + Extension(::prost_types::Any), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExpressionCommon { + /// (Required) Keep the information of the origin for this expression such as stacktrace. + #[prost(message, optional, tag = "1")] + pub origin: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CommonInlineUserDefinedFunction { + /// (Required) Name of the user-defined function. + #[prost(string, tag = "1")] + pub function_name: ::prost::alloc::string::String, + /// (Optional) Indicate if the user-defined function is deterministic. + #[prost(bool, tag = "2")] + pub deterministic: bool, + /// (Optional) Function arguments. Empty arguments are allowed. + #[prost(message, repeated, tag = "3")] + pub arguments: ::prost::alloc::vec::Vec, + /// (Required) Indicate the function type of the user-defined function. + #[prost( + oneof = "common_inline_user_defined_function::Function", + tags = "4, 5, 6" + )] + pub function: ::core::option::Option, +} +/// Nested message and enum types in `CommonInlineUserDefinedFunction`. +pub mod common_inline_user_defined_function { + /// (Required) Indicate the function type of the user-defined function. + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Function { + #[prost(message, tag = "4")] + PythonUdf(super::PythonUdf), + #[prost(message, tag = "5")] + ScalarScalaUdf(super::ScalarScalaUdf), + #[prost(message, tag = "6")] + JavaUdf(super::JavaUdf), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PythonUdf { + /// (Required) Output type of the Python UDF + #[prost(message, optional, tag = "1")] + pub output_type: ::core::option::Option, + /// (Required) EvalType of the Python UDF + #[prost(int32, tag = "2")] + pub eval_type: i32, + /// (Required) The encoded commands of the Python UDF + #[prost(bytes = "vec", tag = "3")] + pub command: ::prost::alloc::vec::Vec, + /// (Required) Python version being used in the client. + #[prost(string, tag = "4")] + pub python_ver: ::prost::alloc::string::String, + /// (Optional) Additional includes for the Python UDF. + #[prost(string, repeated, tag = "5")] + pub additional_includes: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ScalarScalaUdf { + /// (Required) Serialized JVM object containing UDF definition, input encoders and output encoder + #[prost(bytes = "vec", tag = "1")] + pub payload: ::prost::alloc::vec::Vec, + /// (Optional) Input type(s) of the UDF + #[prost(message, repeated, tag = "2")] + pub input_types: ::prost::alloc::vec::Vec, + /// (Required) Output type of the UDF + #[prost(message, optional, tag = "3")] + pub output_type: ::core::option::Option, + /// (Required) True if the UDF can return null value + #[prost(bool, tag = "4")] + pub nullable: bool, + /// (Required) Indicate if the UDF is an aggregate function + #[prost(bool, tag = "5")] + pub aggregate: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct JavaUdf { + /// (Required) Fully qualified name of Java class + #[prost(string, tag = "1")] + pub class_name: ::prost::alloc::string::String, + /// (Optional) Output type of the Java UDF + #[prost(message, optional, tag = "2")] + pub output_type: ::core::option::Option, + /// (Required) Indicate if the Java user-defined function is an aggregate function + #[prost(bool, tag = "3")] + pub aggregate: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct TypedAggregateExpression { + /// (Required) The aggregate function object packed into bytes. + #[prost(message, optional, tag = "1")] + pub scalar_scala_udf: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CallFunction { + /// (Required) Unparsed name of the SQL function. + #[prost(string, tag = "1")] + pub function_name: ::prost::alloc::string::String, + /// (Optional) Function arguments. Empty arguments are allowed. + #[prost(message, repeated, tag = "2")] + pub arguments: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct NamedArgumentExpression { + /// (Required) The key of the named argument. + #[prost(string, tag = "1")] + pub key: ::prost::alloc::string::String, + /// (Required) The value expression of the named argument. + #[prost(message, optional, boxed, tag = "2")] + pub value: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct MergeAction { + /// (Required) The action type of the merge action. + #[prost(enumeration = "merge_action::ActionType", tag = "1")] + pub action_type: i32, + /// (Optional) The condition expression of the merge action. + #[prost(message, optional, boxed, tag = "2")] + pub condition: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Optional) The assignments of the merge action. Required for ActionTypes INSERT and UPDATE. + #[prost(message, repeated, tag = "3")] + pub assignments: ::prost::alloc::vec::Vec, +} +/// Nested message and enum types in `MergeAction`. +pub mod merge_action { + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Assignment { + /// (Required) The key of the assignment. + #[prost(message, optional, tag = "1")] + pub key: ::core::option::Option, + /// (Required) The value of the assignment. + #[prost(message, optional, tag = "2")] + pub value: ::core::option::Option, + } + #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] + #[repr(i32)] + pub enum ActionType { + Invalid = 0, + Delete = 1, + Insert = 2, + InsertStar = 3, + Update = 4, + UpdateStar = 5, + } + impl ActionType { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + Self::Invalid => "ACTION_TYPE_INVALID", + Self::Delete => "ACTION_TYPE_DELETE", + Self::Insert => "ACTION_TYPE_INSERT", + Self::InsertStar => "ACTION_TYPE_INSERT_STAR", + Self::Update => "ACTION_TYPE_UPDATE", + Self::UpdateStar => "ACTION_TYPE_UPDATE_STAR", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "ACTION_TYPE_INVALID" => Some(Self::Invalid), + "ACTION_TYPE_DELETE" => Some(Self::Delete), + "ACTION_TYPE_INSERT" => Some(Self::Insert), + "ACTION_TYPE_INSERT_STAR" => Some(Self::InsertStar), + "ACTION_TYPE_UPDATE" => Some(Self::Update), + "ACTION_TYPE_UPDATE_STAR" => Some(Self::UpdateStar), + _ => None, + } + } + } +} +/// Catalog messages are marked as unstable. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Catalog { + #[prost( + oneof = "catalog::CatType", + tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26" + )] + pub cat_type: ::core::option::Option, +} +/// Nested message and enum types in `Catalog`. +pub mod catalog { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum CatType { + #[prost(message, tag = "1")] + CurrentDatabase(super::CurrentDatabase), + #[prost(message, tag = "2")] + SetCurrentDatabase(super::SetCurrentDatabase), + #[prost(message, tag = "3")] + ListDatabases(super::ListDatabases), + #[prost(message, tag = "4")] + ListTables(super::ListTables), + #[prost(message, tag = "5")] + ListFunctions(super::ListFunctions), + #[prost(message, tag = "6")] + ListColumns(super::ListColumns), + #[prost(message, tag = "7")] + GetDatabase(super::GetDatabase), + #[prost(message, tag = "8")] + GetTable(super::GetTable), + #[prost(message, tag = "9")] + GetFunction(super::GetFunction), + #[prost(message, tag = "10")] + DatabaseExists(super::DatabaseExists), + #[prost(message, tag = "11")] + TableExists(super::TableExists), + #[prost(message, tag = "12")] + FunctionExists(super::FunctionExists), + #[prost(message, tag = "13")] + CreateExternalTable(super::CreateExternalTable), + #[prost(message, tag = "14")] + CreateTable(super::CreateTable), + #[prost(message, tag = "15")] + DropTempView(super::DropTempView), + #[prost(message, tag = "16")] + DropGlobalTempView(super::DropGlobalTempView), + #[prost(message, tag = "17")] + RecoverPartitions(super::RecoverPartitions), + #[prost(message, tag = "18")] + IsCached(super::IsCached), + #[prost(message, tag = "19")] + CacheTable(super::CacheTable), + #[prost(message, tag = "20")] + UncacheTable(super::UncacheTable), + #[prost(message, tag = "21")] + ClearCache(super::ClearCache), + #[prost(message, tag = "22")] + RefreshTable(super::RefreshTable), + #[prost(message, tag = "23")] + RefreshByPath(super::RefreshByPath), + #[prost(message, tag = "24")] + CurrentCatalog(super::CurrentCatalog), + #[prost(message, tag = "25")] + SetCurrentCatalog(super::SetCurrentCatalog), + #[prost(message, tag = "26")] + ListCatalogs(super::ListCatalogs), + } +} +/// See `spark.catalog.currentDatabase` +#[derive(Clone, Copy, PartialEq, ::prost::Message)] +pub struct CurrentDatabase {} +/// See `spark.catalog.setCurrentDatabase` +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SetCurrentDatabase { + /// (Required) + #[prost(string, tag = "1")] + pub db_name: ::prost::alloc::string::String, +} +/// See `spark.catalog.listDatabases` +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ListDatabases { + /// (Optional) The pattern that the database name needs to match + #[prost(string, optional, tag = "1")] + pub pattern: ::core::option::Option<::prost::alloc::string::String>, +} +/// See `spark.catalog.listTables` +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ListTables { + /// (Optional) + #[prost(string, optional, tag = "1")] + pub db_name: ::core::option::Option<::prost::alloc::string::String>, + /// (Optional) The pattern that the table name needs to match + #[prost(string, optional, tag = "2")] + pub pattern: ::core::option::Option<::prost::alloc::string::String>, +} +/// See `spark.catalog.listFunctions` +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ListFunctions { + /// (Optional) + #[prost(string, optional, tag = "1")] + pub db_name: ::core::option::Option<::prost::alloc::string::String>, + /// (Optional) The pattern that the function name needs to match + #[prost(string, optional, tag = "2")] + pub pattern: ::core::option::Option<::prost::alloc::string::String>, +} +/// See `spark.catalog.listColumns` +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ListColumns { + /// (Required) + #[prost(string, tag = "1")] + pub table_name: ::prost::alloc::string::String, + /// (Optional) + #[prost(string, optional, tag = "2")] + pub db_name: ::core::option::Option<::prost::alloc::string::String>, +} +/// See `spark.catalog.getDatabase` +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GetDatabase { + /// (Required) + #[prost(string, tag = "1")] + pub db_name: ::prost::alloc::string::String, +} +/// See `spark.catalog.getTable` +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GetTable { + /// (Required) + #[prost(string, tag = "1")] + pub table_name: ::prost::alloc::string::String, + /// (Optional) + #[prost(string, optional, tag = "2")] + pub db_name: ::core::option::Option<::prost::alloc::string::String>, +} +/// See `spark.catalog.getFunction` +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GetFunction { + /// (Required) + #[prost(string, tag = "1")] + pub function_name: ::prost::alloc::string::String, + /// (Optional) + #[prost(string, optional, tag = "2")] + pub db_name: ::core::option::Option<::prost::alloc::string::String>, +} +/// See `spark.catalog.databaseExists` +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct DatabaseExists { + /// (Required) + #[prost(string, tag = "1")] + pub db_name: ::prost::alloc::string::String, +} +/// See `spark.catalog.tableExists` +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct TableExists { + /// (Required) + #[prost(string, tag = "1")] + pub table_name: ::prost::alloc::string::String, + /// (Optional) + #[prost(string, optional, tag = "2")] + pub db_name: ::core::option::Option<::prost::alloc::string::String>, +} +/// See `spark.catalog.functionExists` +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct FunctionExists { + /// (Required) + #[prost(string, tag = "1")] + pub function_name: ::prost::alloc::string::String, + /// (Optional) + #[prost(string, optional, tag = "2")] + pub db_name: ::core::option::Option<::prost::alloc::string::String>, +} +/// See `spark.catalog.createExternalTable` +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CreateExternalTable { + /// (Required) + #[prost(string, tag = "1")] + pub table_name: ::prost::alloc::string::String, + /// (Optional) + #[prost(string, optional, tag = "2")] + pub path: ::core::option::Option<::prost::alloc::string::String>, + /// (Optional) + #[prost(string, optional, tag = "3")] + pub source: ::core::option::Option<::prost::alloc::string::String>, + /// (Optional) + #[prost(message, optional, tag = "4")] + pub schema: ::core::option::Option, + /// Options could be empty for valid data source format. + /// The map key is case insensitive. + #[prost(map = "string, string", tag = "5")] + pub options: + ::std::collections::HashMap<::prost::alloc::string::String, ::prost::alloc::string::String>, +} +/// See `spark.catalog.createTable` +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CreateTable { + /// (Required) + #[prost(string, tag = "1")] + pub table_name: ::prost::alloc::string::String, + /// (Optional) + #[prost(string, optional, tag = "2")] + pub path: ::core::option::Option<::prost::alloc::string::String>, + /// (Optional) + #[prost(string, optional, tag = "3")] + pub source: ::core::option::Option<::prost::alloc::string::String>, + /// (Optional) + #[prost(string, optional, tag = "4")] + pub description: ::core::option::Option<::prost::alloc::string::String>, + /// (Optional) + #[prost(message, optional, tag = "5")] + pub schema: ::core::option::Option, + /// Options could be empty for valid data source format. + /// The map key is case insensitive. + #[prost(map = "string, string", tag = "6")] + pub options: + ::std::collections::HashMap<::prost::alloc::string::String, ::prost::alloc::string::String>, +} +/// See `spark.catalog.dropTempView` +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct DropTempView { + /// (Required) + #[prost(string, tag = "1")] + pub view_name: ::prost::alloc::string::String, +} +/// See `spark.catalog.dropGlobalTempView` +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct DropGlobalTempView { + /// (Required) + #[prost(string, tag = "1")] + pub view_name: ::prost::alloc::string::String, +} +/// See `spark.catalog.recoverPartitions` +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct RecoverPartitions { + /// (Required) + #[prost(string, tag = "1")] + pub table_name: ::prost::alloc::string::String, +} +/// See `spark.catalog.isCached` +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct IsCached { + /// (Required) + #[prost(string, tag = "1")] + pub table_name: ::prost::alloc::string::String, +} +/// See `spark.catalog.cacheTable` +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CacheTable { + /// (Required) + #[prost(string, tag = "1")] + pub table_name: ::prost::alloc::string::String, + /// (Optional) + #[prost(message, optional, tag = "2")] + pub storage_level: ::core::option::Option, +} +/// See `spark.catalog.uncacheTable` +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct UncacheTable { + /// (Required) + #[prost(string, tag = "1")] + pub table_name: ::prost::alloc::string::String, +} +/// See `spark.catalog.clearCache` +#[derive(Clone, Copy, PartialEq, ::prost::Message)] +pub struct ClearCache {} +/// See `spark.catalog.refreshTable` +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct RefreshTable { + /// (Required) + #[prost(string, tag = "1")] + pub table_name: ::prost::alloc::string::String, +} +/// See `spark.catalog.refreshByPath` +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct RefreshByPath { + /// (Required) + #[prost(string, tag = "1")] + pub path: ::prost::alloc::string::String, +} +/// See `spark.catalog.currentCatalog` +#[derive(Clone, Copy, PartialEq, ::prost::Message)] +pub struct CurrentCatalog {} +/// See `spark.catalog.setCurrentCatalog` +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SetCurrentCatalog { + /// (Required) + #[prost(string, tag = "1")] + pub catalog_name: ::prost::alloc::string::String, +} +/// See `spark.catalog.listCatalogs` +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ListCatalogs { + /// (Optional) The pattern that the catalog name needs to match + #[prost(string, optional, tag = "1")] + pub pattern: ::core::option::Option<::prost::alloc::string::String>, +} +/// The main \[[Relation]\] type. Fundamentally, a relation is a typed container +/// that has exactly one explicit relation type set. +/// +/// When adding new relation types, they have to be registered here. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Relation { + #[prost(message, optional, tag = "1")] + pub common: ::core::option::Option, + #[prost( + oneof = "relation::RelType", + tags = "2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 90, 91, 92, 100, 101, 102, 103, 104, 105, 106, 107, 200, 998, 999" + )] + pub rel_type: ::core::option::Option, +} +/// Nested message and enum types in `Relation`. +pub mod relation { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum RelType { + #[prost(message, tag = "2")] + Read(super::Read), + #[prost(message, tag = "3")] + Project(::prost::alloc::boxed::Box), + #[prost(message, tag = "4")] + Filter(::prost::alloc::boxed::Box), + #[prost(message, tag = "5")] + Join(::prost::alloc::boxed::Box), + #[prost(message, tag = "6")] + SetOp(::prost::alloc::boxed::Box), + #[prost(message, tag = "7")] + Sort(::prost::alloc::boxed::Box), + #[prost(message, tag = "8")] + Limit(::prost::alloc::boxed::Box), + #[prost(message, tag = "9")] + Aggregate(::prost::alloc::boxed::Box), + #[prost(message, tag = "10")] + Sql(super::Sql), + #[prost(message, tag = "11")] + LocalRelation(super::LocalRelation), + #[prost(message, tag = "12")] + Sample(::prost::alloc::boxed::Box), + #[prost(message, tag = "13")] + Offset(::prost::alloc::boxed::Box), + #[prost(message, tag = "14")] + Deduplicate(::prost::alloc::boxed::Box), + #[prost(message, tag = "15")] + Range(super::Range), + #[prost(message, tag = "16")] + SubqueryAlias(::prost::alloc::boxed::Box), + #[prost(message, tag = "17")] + Repartition(::prost::alloc::boxed::Box), + #[prost(message, tag = "18")] + ToDf(::prost::alloc::boxed::Box), + #[prost(message, tag = "19")] + WithColumnsRenamed(::prost::alloc::boxed::Box), + #[prost(message, tag = "20")] + ShowString(::prost::alloc::boxed::Box), + #[prost(message, tag = "21")] + Drop(::prost::alloc::boxed::Box), + #[prost(message, tag = "22")] + Tail(::prost::alloc::boxed::Box), + #[prost(message, tag = "23")] + WithColumns(::prost::alloc::boxed::Box), + #[prost(message, tag = "24")] + Hint(::prost::alloc::boxed::Box), + #[prost(message, tag = "25")] + Unpivot(::prost::alloc::boxed::Box), + #[prost(message, tag = "26")] + ToSchema(::prost::alloc::boxed::Box), + #[prost(message, tag = "27")] + RepartitionByExpression(::prost::alloc::boxed::Box), + #[prost(message, tag = "28")] + MapPartitions(::prost::alloc::boxed::Box), + #[prost(message, tag = "29")] + CollectMetrics(::prost::alloc::boxed::Box), + #[prost(message, tag = "30")] + Parse(::prost::alloc::boxed::Box), + #[prost(message, tag = "31")] + GroupMap(::prost::alloc::boxed::Box), + #[prost(message, tag = "32")] + CoGroupMap(::prost::alloc::boxed::Box), + #[prost(message, tag = "33")] + WithWatermark(::prost::alloc::boxed::Box), + #[prost(message, tag = "34")] + ApplyInPandasWithState(::prost::alloc::boxed::Box), + #[prost(message, tag = "35")] + HtmlString(::prost::alloc::boxed::Box), + #[prost(message, tag = "36")] + CachedLocalRelation(super::CachedLocalRelation), + #[prost(message, tag = "37")] + CachedRemoteRelation(super::CachedRemoteRelation), + #[prost(message, tag = "38")] + CommonInlineUserDefinedTableFunction(super::CommonInlineUserDefinedTableFunction), + #[prost(message, tag = "39")] + AsOfJoin(::prost::alloc::boxed::Box), + #[prost(message, tag = "40")] + CommonInlineUserDefinedDataSource(super::CommonInlineUserDefinedDataSource), + #[prost(message, tag = "41")] + WithRelations(::prost::alloc::boxed::Box), + #[prost(message, tag = "42")] + Transpose(::prost::alloc::boxed::Box), + /// NA functions + #[prost(message, tag = "90")] + FillNa(::prost::alloc::boxed::Box), + #[prost(message, tag = "91")] + DropNa(::prost::alloc::boxed::Box), + #[prost(message, tag = "92")] + Replace(::prost::alloc::boxed::Box), + /// stat functions + #[prost(message, tag = "100")] + Summary(::prost::alloc::boxed::Box), + #[prost(message, tag = "101")] + Crosstab(::prost::alloc::boxed::Box), + #[prost(message, tag = "102")] + Describe(::prost::alloc::boxed::Box), + #[prost(message, tag = "103")] + Cov(::prost::alloc::boxed::Box), + #[prost(message, tag = "104")] + Corr(::prost::alloc::boxed::Box), + #[prost(message, tag = "105")] + ApproxQuantile(::prost::alloc::boxed::Box), + #[prost(message, tag = "106")] + FreqItems(::prost::alloc::boxed::Box), + #[prost(message, tag = "107")] + SampleBy(::prost::alloc::boxed::Box), + /// Catalog API (experimental / unstable) + #[prost(message, tag = "200")] + Catalog(super::Catalog), + /// This field is used to mark extensions to the protocol. When plugins generate arbitrary + /// relations they can add them here. During the planning the correct resolution is done. + #[prost(message, tag = "998")] + Extension(::prost_types::Any), + #[prost(message, tag = "999")] + Unknown(super::Unknown), + } +} +/// Used for testing purposes only. +#[derive(Clone, Copy, PartialEq, ::prost::Message)] +pub struct Unknown {} +/// Common metadata of all relations. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct RelationCommon { + /// (Required) Shared relation metadata. + #[deprecated] + #[prost(string, tag = "1")] + pub source_info: ::prost::alloc::string::String, + /// (Optional) A per-client globally unique id for a given connect plan. + #[prost(int64, optional, tag = "2")] + pub plan_id: ::core::option::Option, + /// (Optional) Keep the information of the origin for this expression such as stacktrace. + #[prost(message, optional, tag = "3")] + pub origin: ::core::option::Option, +} +/// Relation that uses a SQL query to generate the output. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Sql { + /// (Required) The SQL query. + #[prost(string, tag = "1")] + pub query: ::prost::alloc::string::String, + /// (Optional) A map of parameter names to literal expressions. + #[prost(map = "string, message", tag = "2")] + pub args: ::std::collections::HashMap<::prost::alloc::string::String, expression::Literal>, + /// (Optional) A sequence of literal expressions for positional parameters in the SQL query text. + #[deprecated] + #[prost(message, repeated, tag = "3")] + pub pos_args: ::prost::alloc::vec::Vec, + /// (Optional) A map of parameter names to expressions. + /// It cannot coexist with `pos_arguments`. + #[prost(map = "string, message", tag = "4")] + pub named_arguments: ::std::collections::HashMap<::prost::alloc::string::String, Expression>, + /// (Optional) A sequence of expressions for positional parameters in the SQL query text. + /// It cannot coexist with `named_arguments`. + #[prost(message, repeated, tag = "5")] + pub pos_arguments: ::prost::alloc::vec::Vec, +} +/// Relation of type \[[WithRelations]\]. +/// +/// This relation contains a root plan, and one or more references that are used by the root plan. +/// There are two ways of referencing a relation, by name (through a subquery alias), or by plan_id +/// (using RelationCommon.plan_id). +/// +/// This relation can be used to implement CTEs, describe DAGs, or to reduce tree depth. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct WithRelations { + /// (Required) Plan at the root of the query tree. This plan is expected to contain one or more + /// references. Those references get expanded later on by the engine. + #[prost(message, optional, boxed, tag = "1")] + pub root: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) Plans referenced by the root plan. Relations in this list are also allowed to + /// contain references to other relations in this list, as long they do not form cycles. + #[prost(message, repeated, tag = "2")] + pub references: ::prost::alloc::vec::Vec, +} +/// Relation that reads from a file / table or other data source. Does not have additional +/// inputs. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Read { + /// (Optional) Indicates if this is a streaming read. + #[prost(bool, tag = "3")] + pub is_streaming: bool, + #[prost(oneof = "read::ReadType", tags = "1, 2")] + pub read_type: ::core::option::Option, +} +/// Nested message and enum types in `Read`. +pub mod read { + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct NamedTable { + /// (Required) Unparsed identifier for the table. + #[prost(string, tag = "1")] + pub unparsed_identifier: ::prost::alloc::string::String, + /// Options for the named table. The map key is case insensitive. + #[prost(map = "string, string", tag = "2")] + pub options: ::std::collections::HashMap< + ::prost::alloc::string::String, + ::prost::alloc::string::String, + >, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct DataSource { + /// (Optional) Supported formats include: parquet, orc, text, json, parquet, csv, avro. + /// + /// If not set, the value from SQL conf 'spark.sql.sources.default' will be used. + #[prost(string, optional, tag = "1")] + pub format: ::core::option::Option<::prost::alloc::string::String>, + /// (Optional) If not set, Spark will infer the schema. + /// + /// This schema string should be either DDL-formatted or JSON-formatted. + #[prost(string, optional, tag = "2")] + pub schema: ::core::option::Option<::prost::alloc::string::String>, + /// Options for the data source. The context of this map varies based on the + /// data source format. This options could be empty for valid data source format. + /// The map key is case insensitive. + #[prost(map = "string, string", tag = "3")] + pub options: ::std::collections::HashMap< + ::prost::alloc::string::String, + ::prost::alloc::string::String, + >, + /// (Optional) A list of path for file-system backed data sources. + #[prost(string, repeated, tag = "4")] + pub paths: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + /// (Optional) Condition in the where clause for each partition. + /// + /// This is only supported by the JDBC data source. + #[prost(string, repeated, tag = "5")] + pub predicates: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + } + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum ReadType { + #[prost(message, tag = "1")] + NamedTable(NamedTable), + #[prost(message, tag = "2")] + DataSource(DataSource), + } +} +/// Projection of a bag of expressions for a given input relation. +/// +/// The input relation must be specified. +/// The projected expression can be an arbitrary expression. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Project { + /// (Optional) Input relation is optional for Project. + /// + /// For example, `SELECT ABS(-1)` is valid plan without an input plan. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) A Project requires at least one expression. + #[prost(message, repeated, tag = "3")] + pub expressions: ::prost::alloc::vec::Vec, +} +/// Relation that applies a boolean expression `condition` on each row of `input` to produce +/// the output result. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Filter { + /// (Required) Input relation for a Filter. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) A Filter must have a condition expression. + #[prost(message, optional, tag = "2")] + pub condition: ::core::option::Option, +} +/// Relation of type \[[Join]\]. +/// +/// `left` and `right` must be present. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Join { + /// (Required) Left input relation for a Join. + #[prost(message, optional, boxed, tag = "1")] + pub left: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) Right input relation for a Join. + #[prost(message, optional, boxed, tag = "2")] + pub right: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Optional) The join condition. Could be unset when `using_columns` is utilized. + /// + /// This field does not co-exist with using_columns. + #[prost(message, optional, tag = "3")] + pub join_condition: ::core::option::Option, + /// (Required) The join type. + #[prost(enumeration = "join::JoinType", tag = "4")] + pub join_type: i32, + /// Optional. using_columns provides a list of columns that should present on both sides of + /// the join inputs that this Join will join on. For example A JOIN B USING col_name is + /// equivalent to A JOIN B on A.col_name = B.col_name. + /// + /// This field does not co-exist with join_condition. + #[prost(string, repeated, tag = "5")] + pub using_columns: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + /// (Optional) Only used by joinWith. Set the left and right join data types. + #[prost(message, optional, tag = "6")] + pub join_data_type: ::core::option::Option, +} +/// Nested message and enum types in `Join`. +pub mod join { + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct JoinDataType { + /// If the left data type is a struct. + #[prost(bool, tag = "1")] + pub is_left_struct: bool, + /// If the right data type is a struct. + #[prost(bool, tag = "2")] + pub is_right_struct: bool, + } + #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] + #[repr(i32)] + pub enum JoinType { + Unspecified = 0, + Inner = 1, + FullOuter = 2, + LeftOuter = 3, + RightOuter = 4, + LeftAnti = 5, + LeftSemi = 6, + Cross = 7, + } + impl JoinType { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + Self::Unspecified => "JOIN_TYPE_UNSPECIFIED", + Self::Inner => "JOIN_TYPE_INNER", + Self::FullOuter => "JOIN_TYPE_FULL_OUTER", + Self::LeftOuter => "JOIN_TYPE_LEFT_OUTER", + Self::RightOuter => "JOIN_TYPE_RIGHT_OUTER", + Self::LeftAnti => "JOIN_TYPE_LEFT_ANTI", + Self::LeftSemi => "JOIN_TYPE_LEFT_SEMI", + Self::Cross => "JOIN_TYPE_CROSS", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "JOIN_TYPE_UNSPECIFIED" => Some(Self::Unspecified), + "JOIN_TYPE_INNER" => Some(Self::Inner), + "JOIN_TYPE_FULL_OUTER" => Some(Self::FullOuter), + "JOIN_TYPE_LEFT_OUTER" => Some(Self::LeftOuter), + "JOIN_TYPE_RIGHT_OUTER" => Some(Self::RightOuter), + "JOIN_TYPE_LEFT_ANTI" => Some(Self::LeftAnti), + "JOIN_TYPE_LEFT_SEMI" => Some(Self::LeftSemi), + "JOIN_TYPE_CROSS" => Some(Self::Cross), + _ => None, + } + } + } +} +/// Relation of type \[[SetOperation]\] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SetOperation { + /// (Required) Left input relation for a Set operation. + #[prost(message, optional, boxed, tag = "1")] + pub left_input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) Right input relation for a Set operation. + #[prost(message, optional, boxed, tag = "2")] + pub right_input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) The Set operation type. + #[prost(enumeration = "set_operation::SetOpType", tag = "3")] + pub set_op_type: i32, + /// (Optional) If to remove duplicate rows. + /// + /// True to preserve all results. + /// False to remove duplicate rows. + #[prost(bool, optional, tag = "4")] + pub is_all: ::core::option::Option, + /// (Optional) If to perform the Set operation based on name resolution. + /// + /// Only UNION supports this option. + #[prost(bool, optional, tag = "5")] + pub by_name: ::core::option::Option, + /// (Optional) If to perform the Set operation and allow missing columns. + /// + /// Only UNION supports this option. + #[prost(bool, optional, tag = "6")] + pub allow_missing_columns: ::core::option::Option, +} +/// Nested message and enum types in `SetOperation`. +pub mod set_operation { + #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] + #[repr(i32)] + pub enum SetOpType { + Unspecified = 0, + Intersect = 1, + Union = 2, + Except = 3, + } + impl SetOpType { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + Self::Unspecified => "SET_OP_TYPE_UNSPECIFIED", + Self::Intersect => "SET_OP_TYPE_INTERSECT", + Self::Union => "SET_OP_TYPE_UNION", + Self::Except => "SET_OP_TYPE_EXCEPT", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "SET_OP_TYPE_UNSPECIFIED" => Some(Self::Unspecified), + "SET_OP_TYPE_INTERSECT" => Some(Self::Intersect), + "SET_OP_TYPE_UNION" => Some(Self::Union), + "SET_OP_TYPE_EXCEPT" => Some(Self::Except), + _ => None, + } + } + } +} +/// Relation of type \[[Limit]\] that is used to `limit` rows from the input relation. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Limit { + /// (Required) Input relation for a Limit. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) the limit. + #[prost(int32, tag = "2")] + pub limit: i32, +} +/// Relation of type \[[Offset]\] that is used to read rows staring from the `offset` on +/// the input relation. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Offset { + /// (Required) Input relation for an Offset. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) the limit. + #[prost(int32, tag = "2")] + pub offset: i32, +} +/// Relation of type \[[Tail]\] that is used to fetch `limit` rows from the last of the input relation. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Tail { + /// (Required) Input relation for an Tail. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) the limit. + #[prost(int32, tag = "2")] + pub limit: i32, +} +/// Relation of type \[[Aggregate]\]. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Aggregate { + /// (Required) Input relation for a RelationalGroupedDataset. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) How the RelationalGroupedDataset was built. + #[prost(enumeration = "aggregate::GroupType", tag = "2")] + pub group_type: i32, + /// (Required) Expressions for grouping keys + #[prost(message, repeated, tag = "3")] + pub grouping_expressions: ::prost::alloc::vec::Vec, + /// (Required) List of values that will be translated to columns in the output DataFrame. + #[prost(message, repeated, tag = "4")] + pub aggregate_expressions: ::prost::alloc::vec::Vec, + /// (Optional) Pivots a column of the current `DataFrame` and performs the specified aggregation. + #[prost(message, optional, tag = "5")] + pub pivot: ::core::option::Option, + /// (Optional) List of values that will be translated to columns in the output DataFrame. + #[prost(message, repeated, tag = "6")] + pub grouping_sets: ::prost::alloc::vec::Vec, +} +/// Nested message and enum types in `Aggregate`. +pub mod aggregate { + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Pivot { + /// (Required) The column to pivot + #[prost(message, optional, tag = "1")] + pub col: ::core::option::Option, + /// (Optional) List of values that will be translated to columns in the output DataFrame. + /// + /// Note that if it is empty, the server side will immediately trigger a job to collect + /// the distinct values of the column. + #[prost(message, repeated, tag = "2")] + pub values: ::prost::alloc::vec::Vec, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct GroupingSets { + /// (Required) Individual grouping set + #[prost(message, repeated, tag = "1")] + pub grouping_set: ::prost::alloc::vec::Vec, + } + #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] + #[repr(i32)] + pub enum GroupType { + Unspecified = 0, + Groupby = 1, + Rollup = 2, + Cube = 3, + Pivot = 4, + GroupingSets = 5, + } + impl GroupType { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + Self::Unspecified => "GROUP_TYPE_UNSPECIFIED", + Self::Groupby => "GROUP_TYPE_GROUPBY", + Self::Rollup => "GROUP_TYPE_ROLLUP", + Self::Cube => "GROUP_TYPE_CUBE", + Self::Pivot => "GROUP_TYPE_PIVOT", + Self::GroupingSets => "GROUP_TYPE_GROUPING_SETS", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "GROUP_TYPE_UNSPECIFIED" => Some(Self::Unspecified), + "GROUP_TYPE_GROUPBY" => Some(Self::Groupby), + "GROUP_TYPE_ROLLUP" => Some(Self::Rollup), + "GROUP_TYPE_CUBE" => Some(Self::Cube), + "GROUP_TYPE_PIVOT" => Some(Self::Pivot), + "GROUP_TYPE_GROUPING_SETS" => Some(Self::GroupingSets), + _ => None, + } + } + } +} +/// Relation of type \[[Sort]\]. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Sort { + /// (Required) Input relation for a Sort. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) The ordering expressions + #[prost(message, repeated, tag = "2")] + pub order: ::prost::alloc::vec::Vec, + /// (Optional) if this is a global sort. + #[prost(bool, optional, tag = "3")] + pub is_global: ::core::option::Option, +} +/// Drop specified columns. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Drop { + /// (Required) The input relation. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Optional) columns to drop. + #[prost(message, repeated, tag = "2")] + pub columns: ::prost::alloc::vec::Vec, + /// (Optional) names of columns to drop. + #[prost(string, repeated, tag = "3")] + pub column_names: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, +} +/// Relation of type \[[Deduplicate]\] which have duplicate rows removed, could consider either only +/// the subset of columns or all the columns. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Deduplicate { + /// (Required) Input relation for a Deduplicate. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Optional) Deduplicate based on a list of column names. + /// + /// This field does not co-use with `all_columns_as_keys`. + #[prost(string, repeated, tag = "2")] + pub column_names: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + /// (Optional) Deduplicate based on all the columns of the input relation. + /// + /// This field does not co-use with `column_names`. + #[prost(bool, optional, tag = "3")] + pub all_columns_as_keys: ::core::option::Option, + /// (Optional) Deduplicate within the time range of watermark. + #[prost(bool, optional, tag = "4")] + pub within_watermark: ::core::option::Option, +} +/// A relation that does not need to be qualified by name. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct LocalRelation { + /// (Optional) Local collection data serialized into Arrow IPC streaming format which contains + /// the schema of the data. + #[prost(bytes = "vec", optional, tag = "1")] + pub data: ::core::option::Option<::prost::alloc::vec::Vec>, + /// (Optional) The schema of local data. + /// It should be either a DDL-formatted type string or a JSON string. + /// + /// The server side will update the column names and data types according to this schema. + /// If the 'data' is not provided, then this schema will be required. + #[prost(string, optional, tag = "2")] + pub schema: ::core::option::Option<::prost::alloc::string::String>, +} +/// A local relation that has been cached already. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CachedLocalRelation { + /// (Required) A sha-256 hash of the serialized local relation in proto, see LocalRelation. + #[prost(string, tag = "3")] + pub hash: ::prost::alloc::string::String, +} +/// Represents a remote relation that has been cached on server. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CachedRemoteRelation { + /// (Required) ID of the remote related (assigned by the service). + #[prost(string, tag = "1")] + pub relation_id: ::prost::alloc::string::String, +} +/// Relation of type \[[Sample]\] that samples a fraction of the dataset. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Sample { + /// (Required) Input relation for a Sample. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) lower bound. + #[prost(double, tag = "2")] + pub lower_bound: f64, + /// (Required) upper bound. + #[prost(double, tag = "3")] + pub upper_bound: f64, + /// (Optional) Whether to sample with replacement. + #[prost(bool, optional, tag = "4")] + pub with_replacement: ::core::option::Option, + /// (Required) The random seed. + /// This field is required to avoid generating mutable dataframes (see SPARK-48184 for details), + /// however, still keep it 'optional' here for backward compatibility. + #[prost(int64, optional, tag = "5")] + pub seed: ::core::option::Option, + /// (Required) Explicitly sort the underlying plan to make the ordering deterministic or cache it. + /// This flag is true when invoking `dataframe.randomSplit` to randomly splits DataFrame with the + /// provided weights. Otherwise, it is false. + #[prost(bool, tag = "6")] + pub deterministic_order: bool, +} +/// Relation of type \[[Range]\] that generates a sequence of integers. +#[derive(Clone, Copy, PartialEq, ::prost::Message)] +pub struct Range { + /// (Optional) Default value = 0 + #[prost(int64, optional, tag = "1")] + pub start: ::core::option::Option, + /// (Required) + #[prost(int64, tag = "2")] + pub end: i64, + /// (Required) + #[prost(int64, tag = "3")] + pub step: i64, + /// Optional. Default value is assigned by 1) SQL conf "spark.sql.leafNodeDefaultParallelism" if + /// it is set, or 2) spark default parallelism. + #[prost(int32, optional, tag = "4")] + pub num_partitions: ::core::option::Option, +} +/// Relation alias. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SubqueryAlias { + /// (Required) The input relation of SubqueryAlias. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) The alias. + #[prost(string, tag = "2")] + pub alias: ::prost::alloc::string::String, + /// (Optional) Qualifier of the alias. + #[prost(string, repeated, tag = "3")] + pub qualifier: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, +} +/// Relation repartition. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Repartition { + /// (Required) The input relation of Repartition. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) Must be positive. + #[prost(int32, tag = "2")] + pub num_partitions: i32, + /// (Optional) Default value is false. + #[prost(bool, optional, tag = "3")] + pub shuffle: ::core::option::Option, +} +/// Compose the string representing rows for output. +/// It will invoke 'Dataset.showString' to compute the results. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ShowString { + /// (Required) The input relation. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) Number of rows to show. + #[prost(int32, tag = "2")] + pub num_rows: i32, + /// (Required) If set to more than 0, truncates strings to + /// `truncate` characters and all cells will be aligned right. + #[prost(int32, tag = "3")] + pub truncate: i32, + /// (Required) If set to true, prints output rows vertically (one line per column value). + #[prost(bool, tag = "4")] + pub vertical: bool, +} +/// Compose the string representing rows for output. +/// It will invoke 'Dataset.htmlString' to compute the results. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct HtmlString { + /// (Required) The input relation. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) Number of rows to show. + #[prost(int32, tag = "2")] + pub num_rows: i32, + /// (Required) If set to more than 0, truncates strings to + /// `truncate` characters and all cells will be aligned right. + #[prost(int32, tag = "3")] + pub truncate: i32, +} +/// Computes specified statistics for numeric and string columns. +/// It will invoke 'Dataset.summary' (same as 'StatFunctions.summary') +/// to compute the results. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct StatSummary { + /// (Required) The input relation. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Optional) Statistics from to be computed. + /// + /// Available statistics are: + /// count + /// mean + /// stddev + /// min + /// max + /// arbitrary approximate percentiles specified as a percentage (e.g. 75%) + /// count_distinct + /// approx_count_distinct + /// + /// If no statistics are given, this function computes 'count', 'mean', 'stddev', 'min', + /// 'approximate quartiles' (percentiles at 25%, 50%, and 75%), and 'max'. + #[prost(string, repeated, tag = "2")] + pub statistics: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, +} +/// Computes basic statistics for numeric and string columns, including count, mean, stddev, min, +/// and max. If no columns are given, this function computes statistics for all numerical or +/// string columns. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct StatDescribe { + /// (Required) The input relation. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Optional) Columns to compute statistics on. + #[prost(string, repeated, tag = "2")] + pub cols: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, +} +/// Computes a pair-wise frequency table of the given columns. Also known as a contingency table. +/// It will invoke 'Dataset.stat.crosstab' (same as 'StatFunctions.crossTabulate') +/// to compute the results. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct StatCrosstab { + /// (Required) The input relation. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) The name of the first column. + /// + /// Distinct items will make the first item of each row. + #[prost(string, tag = "2")] + pub col1: ::prost::alloc::string::String, + /// (Required) The name of the second column. + /// + /// Distinct items will make the column names of the DataFrame. + #[prost(string, tag = "3")] + pub col2: ::prost::alloc::string::String, +} +/// Calculate the sample covariance of two numerical columns of a DataFrame. +/// It will invoke 'Dataset.stat.cov' (same as 'StatFunctions.calculateCov') to compute the results. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct StatCov { + /// (Required) The input relation. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) The name of the first column. + #[prost(string, tag = "2")] + pub col1: ::prost::alloc::string::String, + /// (Required) The name of the second column. + #[prost(string, tag = "3")] + pub col2: ::prost::alloc::string::String, +} +/// Calculates the correlation of two columns of a DataFrame. Currently only supports the Pearson +/// Correlation Coefficient. It will invoke 'Dataset.stat.corr' (same as +/// 'StatFunctions.pearsonCorrelation') to compute the results. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct StatCorr { + /// (Required) The input relation. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) The name of the first column. + #[prost(string, tag = "2")] + pub col1: ::prost::alloc::string::String, + /// (Required) The name of the second column. + #[prost(string, tag = "3")] + pub col2: ::prost::alloc::string::String, + /// (Optional) Default value is 'pearson'. + /// + /// Currently only supports the Pearson Correlation Coefficient. + #[prost(string, optional, tag = "4")] + pub method: ::core::option::Option<::prost::alloc::string::String>, +} +/// Calculates the approximate quantiles of numerical columns of a DataFrame. +/// It will invoke 'Dataset.stat.approxQuantile' (same as 'StatFunctions.approxQuantile') +/// to compute the results. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct StatApproxQuantile { + /// (Required) The input relation. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) The names of the numerical columns. + #[prost(string, repeated, tag = "2")] + pub cols: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + /// (Required) A list of quantile probabilities. + /// + /// Each number must belong to \[0, 1\]. + /// For example 0 is the minimum, 0.5 is the median, 1 is the maximum. + #[prost(double, repeated, tag = "3")] + pub probabilities: ::prost::alloc::vec::Vec, + /// (Required) The relative target precision to achieve (greater than or equal to 0). + /// + /// If set to zero, the exact quantiles are computed, which could be very expensive. + /// Note that values greater than 1 are accepted but give the same result as 1. + #[prost(double, tag = "4")] + pub relative_error: f64, +} +/// Finding frequent items for columns, possibly with false positives. +/// It will invoke 'Dataset.stat.freqItems' (same as 'StatFunctions.freqItems') +/// to compute the results. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct StatFreqItems { + /// (Required) The input relation. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) The names of the columns to search frequent items in. + #[prost(string, repeated, tag = "2")] + pub cols: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + /// (Optional) The minimum frequency for an item to be considered `frequent`. + /// Should be greater than 1e-4. + #[prost(double, optional, tag = "3")] + pub support: ::core::option::Option, +} +/// Returns a stratified sample without replacement based on the fraction +/// given on each stratum. +/// It will invoke 'Dataset.stat.freqItems' (same as 'StatFunctions.freqItems') +/// to compute the results. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct StatSampleBy { + /// (Required) The input relation. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) The column that defines strata. + #[prost(message, optional, tag = "2")] + pub col: ::core::option::Option, + /// (Required) Sampling fraction for each stratum. + /// + /// If a stratum is not specified, we treat its fraction as zero. + #[prost(message, repeated, tag = "3")] + pub fractions: ::prost::alloc::vec::Vec, + /// (Required) The random seed. + /// This field is required to avoid generating mutable dataframes (see SPARK-48184 for details), + /// however, still keep it 'optional' here for backward compatibility. + #[prost(int64, optional, tag = "5")] + pub seed: ::core::option::Option, +} +/// Nested message and enum types in `StatSampleBy`. +pub mod stat_sample_by { + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Fraction { + /// (Required) The stratum. + #[prost(message, optional, tag = "1")] + pub stratum: ::core::option::Option, + /// (Required) The fraction value. Must be in \[0, 1\]. + #[prost(double, tag = "2")] + pub fraction: f64, + } +} +/// Replaces null values. +/// It will invoke 'Dataset.na.fill' (same as 'DataFrameNaFunctions.fill') to compute the results. +/// Following 3 parameter combinations are supported: +/// 1, 'values' only contains 1 item, 'cols' is empty: +/// replaces null values in all type-compatible columns. +/// 2, 'values' only contains 1 item, 'cols' is not empty: +/// replaces null values in specified columns. +/// 3, 'values' contains more than 1 items, then 'cols' is required to have the same length: +/// replaces each specified column with corresponding value. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct NaFill { + /// (Required) The input relation. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Optional) Optional list of column names to consider. + #[prost(string, repeated, tag = "2")] + pub cols: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + /// (Required) Values to replace null values with. + /// + /// Should contain at least 1 item. + /// Only 4 data types are supported now: bool, long, double, string + #[prost(message, repeated, tag = "3")] + pub values: ::prost::alloc::vec::Vec, +} +/// Drop rows containing null values. +/// It will invoke 'Dataset.na.drop' (same as 'DataFrameNaFunctions.drop') to compute the results. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct NaDrop { + /// (Required) The input relation. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Optional) Optional list of column names to consider. + /// + /// When it is empty, all the columns in the input relation will be considered. + #[prost(string, repeated, tag = "2")] + pub cols: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + /// (Optional) The minimum number of non-null and non-NaN values required to keep. + /// + /// When not set, it is equivalent to the number of considered columns, which means + /// a row will be kept only if all columns are non-null. + /// + /// 'how' options ('all', 'any') can be easily converted to this field: + /// - 'all' -> set 'min_non_nulls' 1; + /// - 'any' -> keep 'min_non_nulls' unset; + #[prost(int32, optional, tag = "3")] + pub min_non_nulls: ::core::option::Option, +} +/// Replaces old values with the corresponding values. +/// It will invoke 'Dataset.na.replace' (same as 'DataFrameNaFunctions.replace') +/// to compute the results. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct NaReplace { + /// (Required) The input relation. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Optional) List of column names to consider. + /// + /// When it is empty, all the type-compatible columns in the input relation will be considered. + #[prost(string, repeated, tag = "2")] + pub cols: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + /// (Optional) The value replacement mapping. + #[prost(message, repeated, tag = "3")] + pub replacements: ::prost::alloc::vec::Vec, +} +/// Nested message and enum types in `NAReplace`. +pub mod na_replace { + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Replacement { + /// (Required) The old value. + /// + /// Only 4 data types are supported now: null, bool, double, string. + #[prost(message, optional, tag = "1")] + pub old_value: ::core::option::Option, + /// (Required) The new value. + /// + /// Should be of the same data type with the old value. + #[prost(message, optional, tag = "2")] + pub new_value: ::core::option::Option, + } +} +/// Rename columns on the input relation by the same length of names. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ToDf { + /// (Required) The input relation of RenameColumnsBySameLengthNames. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) + /// + /// The number of columns of the input relation must be equal to the length + /// of this field. If this is not true, an exception will be returned. + #[prost(string, repeated, tag = "2")] + pub column_names: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, +} +/// Rename columns on the input relation by a map with name to name mapping. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct WithColumnsRenamed { + /// (Required) The input relation. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Optional) + /// + /// Renaming column names of input relation from A to B where A is the map key + /// and B is the map value. This is a no-op if schema doesn't contain any A. It + /// does not require that all input relation column names to present as keys. + /// duplicated B are not allowed. + #[prost(map = "string, string", tag = "2")] + pub rename_columns_map: + ::std::collections::HashMap<::prost::alloc::string::String, ::prost::alloc::string::String>, + #[prost(message, repeated, tag = "3")] + pub renames: ::prost::alloc::vec::Vec, +} +/// Nested message and enum types in `WithColumnsRenamed`. +pub mod with_columns_renamed { + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Rename { + /// (Required) The existing column name. + #[prost(string, tag = "1")] + pub col_name: ::prost::alloc::string::String, + /// (Required) The new column name. + #[prost(string, tag = "2")] + pub new_col_name: ::prost::alloc::string::String, + } +} +/// Adding columns or replacing the existing columns that have the same names. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct WithColumns { + /// (Required) The input relation. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) + /// + /// Given a column name, apply the corresponding expression on the column. If column + /// name exists in the input relation, then replace the column. If the column name + /// does not exist in the input relation, then adds it as a new column. + /// + /// Only one name part is expected from each Expression.Alias. + /// + /// An exception is thrown when duplicated names are present in the mapping. + #[prost(message, repeated, tag = "2")] + pub aliases: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct WithWatermark { + /// (Required) The input relation + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) Name of the column containing event time. + #[prost(string, tag = "2")] + pub event_time: ::prost::alloc::string::String, + /// (Required) + #[prost(string, tag = "3")] + pub delay_threshold: ::prost::alloc::string::String, +} +/// Specify a hint over a relation. Hint should have a name and optional parameters. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Hint { + /// (Required) The input relation. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) Hint name. + /// + /// Supported Join hints include BROADCAST, MERGE, SHUFFLE_HASH, SHUFFLE_REPLICATE_NL. + /// + /// Supported partitioning hints include COALESCE, REPARTITION, REPARTITION_BY_RANGE. + #[prost(string, tag = "2")] + pub name: ::prost::alloc::string::String, + /// (Optional) Hint parameters. + #[prost(message, repeated, tag = "3")] + pub parameters: ::prost::alloc::vec::Vec, +} +/// Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns set. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Unpivot { + /// (Required) The input relation. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) Id columns. + #[prost(message, repeated, tag = "2")] + pub ids: ::prost::alloc::vec::Vec, + /// (Optional) Value columns to unpivot. + #[prost(message, optional, tag = "3")] + pub values: ::core::option::Option, + /// (Required) Name of the variable column. + #[prost(string, tag = "4")] + pub variable_column_name: ::prost::alloc::string::String, + /// (Required) Name of the value column. + #[prost(string, tag = "5")] + pub value_column_name: ::prost::alloc::string::String, +} +/// Nested message and enum types in `Unpivot`. +pub mod unpivot { + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Values { + #[prost(message, repeated, tag = "1")] + pub values: ::prost::alloc::vec::Vec, + } +} +/// Transpose a DataFrame, switching rows to columns. +/// Transforms the DataFrame such that the values in the specified index column +/// become the new columns of the DataFrame. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Transpose { + /// (Required) The input relation. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Optional) A list of columns that will be treated as the indices. + /// Only single column is supported now. + #[prost(message, repeated, tag = "2")] + pub index_columns: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ToSchema { + /// (Required) The input relation. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) The user provided schema. + /// + /// The Sever side will update the dataframe with this schema. + #[prost(message, optional, tag = "2")] + pub schema: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct RepartitionByExpression { + /// (Required) The input relation. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) The partitioning expressions. + #[prost(message, repeated, tag = "2")] + pub partition_exprs: ::prost::alloc::vec::Vec, + /// (Optional) number of partitions, must be positive. + #[prost(int32, optional, tag = "3")] + pub num_partitions: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct MapPartitions { + /// (Required) Input relation for a mapPartitions-equivalent API: mapInPandas, mapInArrow. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) Input user-defined function. + #[prost(message, optional, tag = "2")] + pub func: ::core::option::Option, + /// (Optional) Whether to use barrier mode execution or not. + #[prost(bool, optional, tag = "3")] + pub is_barrier: ::core::option::Option, + /// (Optional) ResourceProfile id used for the stage level scheduling. + #[prost(int32, optional, tag = "4")] + pub profile_id: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GroupMap { + /// (Required) Input relation for Group Map API: apply, applyInPandas. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) Expressions for grouping keys. + #[prost(message, repeated, tag = "2")] + pub grouping_expressions: ::prost::alloc::vec::Vec, + /// (Required) Input user-defined function. + #[prost(message, optional, tag = "3")] + pub func: ::core::option::Option, + /// (Optional) Expressions for sorting. Only used by Scala Sorted Group Map API. + #[prost(message, repeated, tag = "4")] + pub sorting_expressions: ::prost::alloc::vec::Vec, + /// Below fields are only used by (Flat)MapGroupsWithState + /// (Optional) Input relation for initial State. + #[prost(message, optional, boxed, tag = "5")] + pub initial_input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Optional) Expressions for grouping keys of the initial state input relation. + #[prost(message, repeated, tag = "6")] + pub initial_grouping_expressions: ::prost::alloc::vec::Vec, + /// (Optional) True if MapGroupsWithState, false if FlatMapGroupsWithState. + #[prost(bool, optional, tag = "7")] + pub is_map_groups_with_state: ::core::option::Option, + /// (Optional) The output mode of the function. + #[prost(string, optional, tag = "8")] + pub output_mode: ::core::option::Option<::prost::alloc::string::String>, + /// (Optional) Timeout configuration for groups that do not receive data for a while. + #[prost(string, optional, tag = "9")] + pub timeout_conf: ::core::option::Option<::prost::alloc::string::String>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CoGroupMap { + /// (Required) One input relation for CoGroup Map API - applyInPandas. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// Expressions for grouping keys of the first input relation. + #[prost(message, repeated, tag = "2")] + pub input_grouping_expressions: ::prost::alloc::vec::Vec, + /// (Required) The other input relation. + #[prost(message, optional, boxed, tag = "3")] + pub other: ::core::option::Option<::prost::alloc::boxed::Box>, + /// Expressions for grouping keys of the other input relation. + #[prost(message, repeated, tag = "4")] + pub other_grouping_expressions: ::prost::alloc::vec::Vec, + /// (Required) Input user-defined function. + #[prost(message, optional, tag = "5")] + pub func: ::core::option::Option, + /// (Optional) Expressions for sorting. Only used by Scala Sorted CoGroup Map API. + #[prost(message, repeated, tag = "6")] + pub input_sorting_expressions: ::prost::alloc::vec::Vec, + /// (Optional) Expressions for sorting. Only used by Scala Sorted CoGroup Map API. + #[prost(message, repeated, tag = "7")] + pub other_sorting_expressions: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ApplyInPandasWithState { + /// (Required) Input relation for applyInPandasWithState. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) Expressions for grouping keys. + #[prost(message, repeated, tag = "2")] + pub grouping_expressions: ::prost::alloc::vec::Vec, + /// (Required) Input user-defined function. + #[prost(message, optional, tag = "3")] + pub func: ::core::option::Option, + /// (Required) Schema for the output DataFrame. + #[prost(string, tag = "4")] + pub output_schema: ::prost::alloc::string::String, + /// (Required) Schema for the state. + #[prost(string, tag = "5")] + pub state_schema: ::prost::alloc::string::String, + /// (Required) The output mode of the function. + #[prost(string, tag = "6")] + pub output_mode: ::prost::alloc::string::String, + /// (Required) Timeout configuration for groups that do not receive data for a while. + #[prost(string, tag = "7")] + pub timeout_conf: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CommonInlineUserDefinedTableFunction { + /// (Required) Name of the user-defined table function. + #[prost(string, tag = "1")] + pub function_name: ::prost::alloc::string::String, + /// (Optional) Whether the user-defined table function is deterministic. + #[prost(bool, tag = "2")] + pub deterministic: bool, + /// (Optional) Function input arguments. Empty arguments are allowed. + #[prost(message, repeated, tag = "3")] + pub arguments: ::prost::alloc::vec::Vec, + /// (Required) Type of the user-defined table function. + #[prost( + oneof = "common_inline_user_defined_table_function::Function", + tags = "4" + )] + pub function: ::core::option::Option, +} +/// Nested message and enum types in `CommonInlineUserDefinedTableFunction`. +pub mod common_inline_user_defined_table_function { + /// (Required) Type of the user-defined table function. + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Function { + #[prost(message, tag = "4")] + PythonUdtf(super::PythonUdtf), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PythonUdtf { + /// (Optional) Return type of the Python UDTF. + #[prost(message, optional, tag = "1")] + pub return_type: ::core::option::Option, + /// (Required) EvalType of the Python UDTF. + #[prost(int32, tag = "2")] + pub eval_type: i32, + /// (Required) The encoded commands of the Python UDTF. + #[prost(bytes = "vec", tag = "3")] + pub command: ::prost::alloc::vec::Vec, + /// (Required) Python version being used in the client. + #[prost(string, tag = "4")] + pub python_ver: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CommonInlineUserDefinedDataSource { + /// (Required) Name of the data source. + #[prost(string, tag = "1")] + pub name: ::prost::alloc::string::String, + /// (Required) The data source type. + #[prost( + oneof = "common_inline_user_defined_data_source::DataSource", + tags = "2" + )] + pub data_source: ::core::option::Option, +} +/// Nested message and enum types in `CommonInlineUserDefinedDataSource`. +pub mod common_inline_user_defined_data_source { + /// (Required) The data source type. + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum DataSource { + #[prost(message, tag = "2")] + PythonDataSource(super::PythonDataSource), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct PythonDataSource { + /// (Required) The encoded commands of the Python data source. + #[prost(bytes = "vec", tag = "1")] + pub command: ::prost::alloc::vec::Vec, + /// (Required) Python version being used in the client. + #[prost(string, tag = "2")] + pub python_ver: ::prost::alloc::string::String, +} +/// Collect arbitrary (named) metrics from a dataset. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CollectMetrics { + /// (Required) The input relation. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) Name of the metrics. + #[prost(string, tag = "2")] + pub name: ::prost::alloc::string::String, + /// (Required) The metric sequence. + #[prost(message, repeated, tag = "3")] + pub metrics: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Parse { + /// (Required) Input relation to Parse. The input is expected to have single text column. + #[prost(message, optional, boxed, tag = "1")] + pub input: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) The expected format of the text. + #[prost(enumeration = "parse::ParseFormat", tag = "2")] + pub format: i32, + /// (Optional) DataType representing the schema. If not set, Spark will infer the schema. + #[prost(message, optional, tag = "3")] + pub schema: ::core::option::Option, + /// Options for the csv/json parser. The map key is case insensitive. + #[prost(map = "string, string", tag = "4")] + pub options: + ::std::collections::HashMap<::prost::alloc::string::String, ::prost::alloc::string::String>, +} +/// Nested message and enum types in `Parse`. +pub mod parse { + #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] + #[repr(i32)] + pub enum ParseFormat { + Unspecified = 0, + Csv = 1, + Json = 2, + } + impl ParseFormat { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + Self::Unspecified => "PARSE_FORMAT_UNSPECIFIED", + Self::Csv => "PARSE_FORMAT_CSV", + Self::Json => "PARSE_FORMAT_JSON", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "PARSE_FORMAT_UNSPECIFIED" => Some(Self::Unspecified), + "PARSE_FORMAT_CSV" => Some(Self::Csv), + "PARSE_FORMAT_JSON" => Some(Self::Json), + _ => None, + } + } + } +} +/// Relation of type \[[AsOfJoin]\]. +/// +/// `left` and `right` must be present. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct AsOfJoin { + /// (Required) Left input relation for a Join. + #[prost(message, optional, boxed, tag = "1")] + pub left: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) Right input relation for a Join. + #[prost(message, optional, boxed, tag = "2")] + pub right: ::core::option::Option<::prost::alloc::boxed::Box>, + /// (Required) Field to join on in left DataFrame + #[prost(message, optional, tag = "3")] + pub left_as_of: ::core::option::Option, + /// (Required) Field to join on in right DataFrame + #[prost(message, optional, tag = "4")] + pub right_as_of: ::core::option::Option, + /// (Optional) The join condition. Could be unset when `using_columns` is utilized. + /// + /// This field does not co-exist with using_columns. + #[prost(message, optional, tag = "5")] + pub join_expr: ::core::option::Option, + /// Optional. using_columns provides a list of columns that should present on both sides of + /// the join inputs that this Join will join on. For example A JOIN B USING col_name is + /// equivalent to A JOIN B on A.col_name = B.col_name. + /// + /// This field does not co-exist with join_condition. + #[prost(string, repeated, tag = "6")] + pub using_columns: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + /// (Required) The join type. + #[prost(string, tag = "7")] + pub join_type: ::prost::alloc::string::String, + /// (Optional) The asof tolerance within this range. + #[prost(message, optional, tag = "8")] + pub tolerance: ::core::option::Option, + /// (Required) Whether allow matching with the same value or not. + #[prost(bool, tag = "9")] + pub allow_exact_matches: bool, + /// (Required) Whether to search for prior, subsequent, or closest matches. + #[prost(string, tag = "10")] + pub direction: ::prost::alloc::string::String, +} +/// A \[[Command]\] is an operation that is executed by the server that does not directly consume or +/// produce a relational result. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Command { + #[prost( + oneof = "command::CommandType", + tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 999" + )] + pub command_type: ::core::option::Option, +} +/// Nested message and enum types in `Command`. +pub mod command { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum CommandType { + #[prost(message, tag = "1")] + RegisterFunction(super::CommonInlineUserDefinedFunction), + #[prost(message, tag = "2")] + WriteOperation(super::WriteOperation), + #[prost(message, tag = "3")] + CreateDataframeView(super::CreateDataFrameViewCommand), + #[prost(message, tag = "4")] + WriteOperationV2(super::WriteOperationV2), + #[prost(message, tag = "5")] + SqlCommand(super::SqlCommand), + #[prost(message, tag = "6")] + WriteStreamOperationStart(super::WriteStreamOperationStart), + #[prost(message, tag = "7")] + StreamingQueryCommand(super::StreamingQueryCommand), + #[prost(message, tag = "8")] + GetResourcesCommand(super::GetResourcesCommand), + #[prost(message, tag = "9")] + StreamingQueryManagerCommand(super::StreamingQueryManagerCommand), + #[prost(message, tag = "10")] + RegisterTableFunction(super::CommonInlineUserDefinedTableFunction), + #[prost(message, tag = "11")] + StreamingQueryListenerBusCommand(super::StreamingQueryListenerBusCommand), + #[prost(message, tag = "12")] + RegisterDataSource(super::CommonInlineUserDefinedDataSource), + #[prost(message, tag = "13")] + CreateResourceProfileCommand(super::CreateResourceProfileCommand), + #[prost(message, tag = "14")] + CheckpointCommand(super::CheckpointCommand), + #[prost(message, tag = "15")] + RemoveCachedRemoteRelationCommand(super::RemoveCachedRemoteRelationCommand), + #[prost(message, tag = "16")] + MergeIntoTableCommand(super::MergeIntoTableCommand), + /// This field is used to mark extensions to the protocol. When plugins generate arbitrary + /// Commands they can add them here. During the planning the correct resolution is done. + #[prost(message, tag = "999")] + Extension(::prost_types::Any), + } +} +/// A SQL Command is used to trigger the eager evaluation of SQL commands in Spark. +/// +/// When the SQL provide as part of the message is a command it will be immediately evaluated +/// and the result will be collected and returned as part of a LocalRelation. If the result is +/// not a command, the operation will simply return a SQL Relation. This allows the client to be +/// almost oblivious to the server-side behavior. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SqlCommand { + /// (Required) SQL Query. + #[deprecated] + #[prost(string, tag = "1")] + pub sql: ::prost::alloc::string::String, + /// (Optional) A map of parameter names to literal expressions. + #[prost(map = "string, message", tag = "2")] + pub args: ::std::collections::HashMap<::prost::alloc::string::String, expression::Literal>, + /// (Optional) A sequence of literal expressions for positional parameters in the SQL query text. + #[deprecated] + #[prost(message, repeated, tag = "3")] + pub pos_args: ::prost::alloc::vec::Vec, + /// (Optional) A map of parameter names to expressions. + /// It cannot coexist with `pos_arguments`. + #[prost(map = "string, message", tag = "4")] + pub named_arguments: ::std::collections::HashMap<::prost::alloc::string::String, Expression>, + /// (Optional) A sequence of expressions for positional parameters in the SQL query text. + /// It cannot coexist with `named_arguments`. + #[deprecated] + #[prost(message, repeated, tag = "5")] + pub pos_arguments: ::prost::alloc::vec::Vec, + /// (Optional) The relation that this SQL command will be built on. + #[prost(message, optional, tag = "6")] + pub input: ::core::option::Option, +} +/// A command that can create DataFrame global temp view or local temp view. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CreateDataFrameViewCommand { + /// (Required) The relation that this view will be built on. + #[prost(message, optional, tag = "1")] + pub input: ::core::option::Option, + /// (Required) View name. + #[prost(string, tag = "2")] + pub name: ::prost::alloc::string::String, + /// (Required) Whether this is global temp view or local temp view. + #[prost(bool, tag = "3")] + pub is_global: bool, + /// (Required) + /// + /// If true, and if the view already exists, updates it; if false, and if the view + /// already exists, throws exception. + #[prost(bool, tag = "4")] + pub replace: bool, +} +/// As writes are not directly handled during analysis and planning, they are modeled as commands. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct WriteOperation { + /// (Required) The output of the `input` relation will be persisted according to the options. + #[prost(message, optional, tag = "1")] + pub input: ::core::option::Option, + /// (Optional) Format value according to the Spark documentation. Examples are: text, parquet, delta. + #[prost(string, optional, tag = "2")] + pub source: ::core::option::Option<::prost::alloc::string::String>, + /// (Required) the save mode. + #[prost(enumeration = "write_operation::SaveMode", tag = "5")] + pub mode: i32, + /// (Optional) List of columns to sort the output by. + #[prost(string, repeated, tag = "6")] + pub sort_column_names: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + /// (Optional) List of columns for partitioning. + #[prost(string, repeated, tag = "7")] + pub partitioning_columns: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + /// (Optional) Bucketing specification. Bucketing must set the number of buckets and the columns + /// to bucket by. + #[prost(message, optional, tag = "8")] + pub bucket_by: ::core::option::Option, + /// (Optional) A list of configuration options. + #[prost(map = "string, string", tag = "9")] + pub options: + ::std::collections::HashMap<::prost::alloc::string::String, ::prost::alloc::string::String>, + /// (Optional) Columns used for clustering the table. + #[prost(string, repeated, tag = "10")] + pub clustering_columns: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + /// (Optional) + /// + /// The destination of the write operation can be either a path or a table. + /// If the destination is neither a path nor a table, such as jdbc and noop, + /// the `save_type` should not be set. + #[prost(oneof = "write_operation::SaveType", tags = "3, 4")] + pub save_type: ::core::option::Option, +} +/// Nested message and enum types in `WriteOperation`. +pub mod write_operation { + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct SaveTable { + /// (Required) The table name. + #[prost(string, tag = "1")] + pub table_name: ::prost::alloc::string::String, + /// (Required) The method to be called to write to the table. + #[prost(enumeration = "save_table::TableSaveMethod", tag = "2")] + pub save_method: i32, + } + /// Nested message and enum types in `SaveTable`. + pub mod save_table { + #[derive( + Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration, + )] + #[repr(i32)] + pub enum TableSaveMethod { + Unspecified = 0, + SaveAsTable = 1, + InsertInto = 2, + } + impl TableSaveMethod { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + Self::Unspecified => "TABLE_SAVE_METHOD_UNSPECIFIED", + Self::SaveAsTable => "TABLE_SAVE_METHOD_SAVE_AS_TABLE", + Self::InsertInto => "TABLE_SAVE_METHOD_INSERT_INTO", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "TABLE_SAVE_METHOD_UNSPECIFIED" => Some(Self::Unspecified), + "TABLE_SAVE_METHOD_SAVE_AS_TABLE" => Some(Self::SaveAsTable), + "TABLE_SAVE_METHOD_INSERT_INTO" => Some(Self::InsertInto), + _ => None, + } + } + } + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct BucketBy { + #[prost(string, repeated, tag = "1")] + pub bucket_column_names: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + #[prost(int32, tag = "2")] + pub num_buckets: i32, + } + #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] + #[repr(i32)] + pub enum SaveMode { + Unspecified = 0, + Append = 1, + Overwrite = 2, + ErrorIfExists = 3, + Ignore = 4, + } + impl SaveMode { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + Self::Unspecified => "SAVE_MODE_UNSPECIFIED", + Self::Append => "SAVE_MODE_APPEND", + Self::Overwrite => "SAVE_MODE_OVERWRITE", + Self::ErrorIfExists => "SAVE_MODE_ERROR_IF_EXISTS", + Self::Ignore => "SAVE_MODE_IGNORE", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "SAVE_MODE_UNSPECIFIED" => Some(Self::Unspecified), + "SAVE_MODE_APPEND" => Some(Self::Append), + "SAVE_MODE_OVERWRITE" => Some(Self::Overwrite), + "SAVE_MODE_ERROR_IF_EXISTS" => Some(Self::ErrorIfExists), + "SAVE_MODE_IGNORE" => Some(Self::Ignore), + _ => None, + } + } + } + /// (Optional) + /// + /// The destination of the write operation can be either a path or a table. + /// If the destination is neither a path nor a table, such as jdbc and noop, + /// the `save_type` should not be set. + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum SaveType { + #[prost(string, tag = "3")] + Path(::prost::alloc::string::String), + #[prost(message, tag = "4")] + Table(SaveTable), + } +} +/// As writes are not directly handled during analysis and planning, they are modeled as commands. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct WriteOperationV2 { + /// (Required) The output of the `input` relation will be persisted according to the options. + #[prost(message, optional, tag = "1")] + pub input: ::core::option::Option, + /// (Required) The destination of the write operation must be either a path or a table. + #[prost(string, tag = "2")] + pub table_name: ::prost::alloc::string::String, + /// (Optional) A provider for the underlying output data source. Spark's default catalog supports + /// "parquet", "json", etc. + #[prost(string, optional, tag = "3")] + pub provider: ::core::option::Option<::prost::alloc::string::String>, + /// (Optional) List of columns for partitioning for output table created by `create`, + /// `createOrReplace`, or `replace` + #[prost(message, repeated, tag = "4")] + pub partitioning_columns: ::prost::alloc::vec::Vec, + /// (Optional) A list of configuration options. + #[prost(map = "string, string", tag = "5")] + pub options: + ::std::collections::HashMap<::prost::alloc::string::String, ::prost::alloc::string::String>, + /// (Optional) A list of table properties. + #[prost(map = "string, string", tag = "6")] + pub table_properties: + ::std::collections::HashMap<::prost::alloc::string::String, ::prost::alloc::string::String>, + /// (Required) Write mode. + #[prost(enumeration = "write_operation_v2::Mode", tag = "7")] + pub mode: i32, + /// (Optional) A condition for overwrite saving mode + #[prost(message, optional, tag = "8")] + pub overwrite_condition: ::core::option::Option, + /// (Optional) Columns used for clustering the table. + #[prost(string, repeated, tag = "9")] + pub clustering_columns: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, +} +/// Nested message and enum types in `WriteOperationV2`. +pub mod write_operation_v2 { + #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] + #[repr(i32)] + pub enum Mode { + Unspecified = 0, + Create = 1, + Overwrite = 2, + OverwritePartitions = 3, + Append = 4, + Replace = 5, + CreateOrReplace = 6, + } + impl Mode { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + Self::Unspecified => "MODE_UNSPECIFIED", + Self::Create => "MODE_CREATE", + Self::Overwrite => "MODE_OVERWRITE", + Self::OverwritePartitions => "MODE_OVERWRITE_PARTITIONS", + Self::Append => "MODE_APPEND", + Self::Replace => "MODE_REPLACE", + Self::CreateOrReplace => "MODE_CREATE_OR_REPLACE", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "MODE_UNSPECIFIED" => Some(Self::Unspecified), + "MODE_CREATE" => Some(Self::Create), + "MODE_OVERWRITE" => Some(Self::Overwrite), + "MODE_OVERWRITE_PARTITIONS" => Some(Self::OverwritePartitions), + "MODE_APPEND" => Some(Self::Append), + "MODE_REPLACE" => Some(Self::Replace), + "MODE_CREATE_OR_REPLACE" => Some(Self::CreateOrReplace), + _ => None, + } + } + } +} +/// Starts write stream operation as streaming query. Query ID and Run ID of the streaming +/// query are returned. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct WriteStreamOperationStart { + /// (Required) The output of the `input` streaming relation will be written. + #[prost(message, optional, tag = "1")] + pub input: ::core::option::Option, + #[prost(string, tag = "2")] + pub format: ::prost::alloc::string::String, + #[prost(map = "string, string", tag = "3")] + pub options: + ::std::collections::HashMap<::prost::alloc::string::String, ::prost::alloc::string::String>, + #[prost(string, repeated, tag = "4")] + pub partitioning_column_names: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + #[prost(string, tag = "9")] + pub output_mode: ::prost::alloc::string::String, + #[prost(string, tag = "10")] + pub query_name: ::prost::alloc::string::String, + #[prost(message, optional, tag = "13")] + pub foreach_writer: ::core::option::Option, + #[prost(message, optional, tag = "14")] + pub foreach_batch: ::core::option::Option, + /// (Optional) Columns used for clustering the table. + #[prost(string, repeated, tag = "15")] + pub clustering_column_names: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + #[prost(oneof = "write_stream_operation_start::Trigger", tags = "5, 6, 7, 8")] + pub trigger: ::core::option::Option, + /// The destination is optional. When set, it can be a path or a table name. + #[prost( + oneof = "write_stream_operation_start::SinkDestination", + tags = "11, 12" + )] + pub sink_destination: ::core::option::Option, +} +/// Nested message and enum types in `WriteStreamOperationStart`. +pub mod write_stream_operation_start { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Trigger { + #[prost(string, tag = "5")] + ProcessingTimeInterval(::prost::alloc::string::String), + #[prost(bool, tag = "6")] + AvailableNow(bool), + #[prost(bool, tag = "7")] + Once(bool), + #[prost(string, tag = "8")] + ContinuousCheckpointInterval(::prost::alloc::string::String), + } + /// The destination is optional. When set, it can be a path or a table name. + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum SinkDestination { + #[prost(string, tag = "11")] + Path(::prost::alloc::string::String), + #[prost(string, tag = "12")] + TableName(::prost::alloc::string::String), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct StreamingForeachFunction { + #[prost(oneof = "streaming_foreach_function::Function", tags = "1, 2")] + pub function: ::core::option::Option, +} +/// Nested message and enum types in `StreamingForeachFunction`. +pub mod streaming_foreach_function { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Function { + #[prost(message, tag = "1")] + PythonFunction(super::PythonUdf), + #[prost(message, tag = "2")] + ScalaFunction(super::ScalarScalaUdf), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct WriteStreamOperationStartResult { + /// (Required) Query instance. See `StreamingQueryInstanceId`. + #[prost(message, optional, tag = "1")] + pub query_id: ::core::option::Option, + /// An optional query name. + #[prost(string, tag = "2")] + pub name: ::prost::alloc::string::String, + /// Optional query started event if there is any listener registered on the client side. + #[prost(string, optional, tag = "3")] + pub query_started_event_json: ::core::option::Option<::prost::alloc::string::String>, +} +/// A tuple that uniquely identifies an instance of streaming query run. It consists of `id` that +/// persists across the streaming runs and `run_id` that changes between each run of the +/// streaming query that resumes from the checkpoint. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct StreamingQueryInstanceId { + /// (Required) The unique id of this query that persists across restarts from checkpoint data. + /// That is, this id is generated when a query is started for the first time, and + /// will be the same every time it is restarted from checkpoint data. + #[prost(string, tag = "1")] + pub id: ::prost::alloc::string::String, + /// (Required) The unique id of this run of the query. That is, every start/restart of a query + /// will generate a unique run_id. Therefore, every time a query is restarted from + /// checkpoint, it will have the same `id` but different `run_id`s. + #[prost(string, tag = "2")] + pub run_id: ::prost::alloc::string::String, +} +/// Commands for a streaming query. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct StreamingQueryCommand { + /// (Required) Query instance. See `StreamingQueryInstanceId`. + #[prost(message, optional, tag = "1")] + pub query_id: ::core::option::Option, + /// See documentation for the corresponding API method in StreamingQuery. + #[prost( + oneof = "streaming_query_command::Command", + tags = "2, 3, 4, 5, 6, 7, 8, 9" + )] + pub command: ::core::option::Option, +} +/// Nested message and enum types in `StreamingQueryCommand`. +pub mod streaming_query_command { + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct ExplainCommand { + /// TODO: Consider reusing Explain from AnalyzePlanRequest message. + /// We can not do this right now since it base.proto imports this file. + #[prost(bool, tag = "1")] + pub extended: bool, + } + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct AwaitTerminationCommand { + #[prost(int64, optional, tag = "2")] + pub timeout_ms: ::core::option::Option, + } + /// See documentation for the corresponding API method in StreamingQuery. + #[derive(Clone, Copy, PartialEq, ::prost::Oneof)] + pub enum Command { + /// status() API. + #[prost(bool, tag = "2")] + Status(bool), + /// lastProgress() API. + #[prost(bool, tag = "3")] + LastProgress(bool), + /// recentProgress() API. + #[prost(bool, tag = "4")] + RecentProgress(bool), + /// stop() API. Stops the query. + #[prost(bool, tag = "5")] + Stop(bool), + /// processAllAvailable() API. Waits till all the available data is processed + #[prost(bool, tag = "6")] + ProcessAllAvailable(bool), + /// explain() API. Returns logical and physical plans. + #[prost(message, tag = "7")] + Explain(ExplainCommand), + /// exception() API. Returns the exception in the query if any. + #[prost(bool, tag = "8")] + Exception(bool), + /// awaitTermination() API. Waits for the termination of the query. + #[prost(message, tag = "9")] + AwaitTermination(AwaitTerminationCommand), + } +} +/// Response for commands on a streaming query. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct StreamingQueryCommandResult { + /// (Required) Query instance id. See `StreamingQueryInstanceId`. + #[prost(message, optional, tag = "1")] + pub query_id: ::core::option::Option, + #[prost( + oneof = "streaming_query_command_result::ResultType", + tags = "2, 3, 4, 5, 6" + )] + pub result_type: ::core::option::Option, +} +/// Nested message and enum types in `StreamingQueryCommandResult`. +pub mod streaming_query_command_result { + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct StatusResult { + /// See documentation for these Scala 'StreamingQueryStatus' struct + #[prost(string, tag = "1")] + pub status_message: ::prost::alloc::string::String, + #[prost(bool, tag = "2")] + pub is_data_available: bool, + #[prost(bool, tag = "3")] + pub is_trigger_active: bool, + #[prost(bool, tag = "4")] + pub is_active: bool, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct RecentProgressResult { + /// Progress reports as an array of json strings. + #[prost(string, repeated, tag = "5")] + pub recent_progress_json: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct ExplainResult { + /// Logical and physical plans as string + #[prost(string, tag = "1")] + pub result: ::prost::alloc::string::String, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct ExceptionResult { + /// (Optional) Exception message as string, maps to the return value of original + /// StreamingQueryException's toString method + #[prost(string, optional, tag = "1")] + pub exception_message: ::core::option::Option<::prost::alloc::string::String>, + /// (Optional) Exception error class as string + #[prost(string, optional, tag = "2")] + pub error_class: ::core::option::Option<::prost::alloc::string::String>, + /// (Optional) Exception stack trace as string + #[prost(string, optional, tag = "3")] + pub stack_trace: ::core::option::Option<::prost::alloc::string::String>, + } + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct AwaitTerminationResult { + #[prost(bool, tag = "1")] + pub terminated: bool, + } + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum ResultType { + #[prost(message, tag = "2")] + Status(StatusResult), + #[prost(message, tag = "3")] + RecentProgress(RecentProgressResult), + #[prost(message, tag = "4")] + Explain(ExplainResult), + #[prost(message, tag = "5")] + Exception(ExceptionResult), + #[prost(message, tag = "6")] + AwaitTermination(AwaitTerminationResult), + } +} +/// Commands for the streaming query manager. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct StreamingQueryManagerCommand { + /// See documentation for the corresponding API method in StreamingQueryManager. + #[prost( + oneof = "streaming_query_manager_command::Command", + tags = "1, 2, 3, 4, 5, 6, 7" + )] + pub command: ::core::option::Option, +} +/// Nested message and enum types in `StreamingQueryManagerCommand`. +pub mod streaming_query_manager_command { + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct AwaitAnyTerminationCommand { + /// (Optional) The waiting time in milliseconds to wait for any query to terminate. + #[prost(int64, optional, tag = "1")] + pub timeout_ms: ::core::option::Option, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct StreamingQueryListenerCommand { + #[prost(bytes = "vec", tag = "1")] + pub listener_payload: ::prost::alloc::vec::Vec, + #[prost(message, optional, tag = "2")] + pub python_listener_payload: ::core::option::Option, + #[prost(string, tag = "3")] + pub id: ::prost::alloc::string::String, + } + /// See documentation for the corresponding API method in StreamingQueryManager. + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Command { + /// active() API, returns a list of active queries. + #[prost(bool, tag = "1")] + Active(bool), + /// get() API, returns the StreamingQuery identified by id. + #[prost(string, tag = "2")] + GetQuery(::prost::alloc::string::String), + /// awaitAnyTermination() API, wait until any query terminates or timeout. + #[prost(message, tag = "3")] + AwaitAnyTermination(AwaitAnyTerminationCommand), + /// resetTerminated() API. + #[prost(bool, tag = "4")] + ResetTerminated(bool), + /// addListener API. + #[prost(message, tag = "5")] + AddListener(StreamingQueryListenerCommand), + /// removeListener API. + #[prost(message, tag = "6")] + RemoveListener(StreamingQueryListenerCommand), + /// listListeners() API, returns a list of streaming query listeners. + #[prost(bool, tag = "7")] + ListListeners(bool), + } +} +/// Response for commands on the streaming query manager. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct StreamingQueryManagerCommandResult { + #[prost( + oneof = "streaming_query_manager_command_result::ResultType", + tags = "1, 2, 3, 4, 5, 6, 7" + )] + pub result_type: ::core::option::Option, +} +/// Nested message and enum types in `StreamingQueryManagerCommandResult`. +pub mod streaming_query_manager_command_result { + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct ActiveResult { + #[prost(message, repeated, tag = "1")] + pub active_queries: ::prost::alloc::vec::Vec, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct StreamingQueryInstance { + /// (Required) The id and runId of this query. + #[prost(message, optional, tag = "1")] + pub id: ::core::option::Option, + /// (Optional) The name of this query. + #[prost(string, optional, tag = "2")] + pub name: ::core::option::Option<::prost::alloc::string::String>, + } + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct AwaitAnyTerminationResult { + #[prost(bool, tag = "1")] + pub terminated: bool, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct StreamingQueryListenerInstance { + #[prost(bytes = "vec", tag = "1")] + pub listener_payload: ::prost::alloc::vec::Vec, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct ListStreamingQueryListenerResult { + /// (Required) Reference IDs of listener instances. + #[prost(string, repeated, tag = "1")] + pub listener_ids: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + } + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum ResultType { + #[prost(message, tag = "1")] + Active(ActiveResult), + #[prost(message, tag = "2")] + Query(StreamingQueryInstance), + #[prost(message, tag = "3")] + AwaitAnyTermination(AwaitAnyTerminationResult), + #[prost(bool, tag = "4")] + ResetTerminated(bool), + #[prost(bool, tag = "5")] + AddListener(bool), + #[prost(bool, tag = "6")] + RemoveListener(bool), + #[prost(message, tag = "7")] + ListListeners(ListStreamingQueryListenerResult), + } +} +/// The protocol for client-side StreamingQueryListener. +/// This command will only be set when either the first listener is added to the client, or the last +/// listener is removed from the client. +/// The add_listener_bus_listener command will only be set true in the first case. +/// The remove_listener_bus_listener command will only be set true in the second case. +#[derive(Clone, Copy, PartialEq, ::prost::Message)] +pub struct StreamingQueryListenerBusCommand { + #[prost(oneof = "streaming_query_listener_bus_command::Command", tags = "1, 2")] + pub command: ::core::option::Option, +} +/// Nested message and enum types in `StreamingQueryListenerBusCommand`. +pub mod streaming_query_listener_bus_command { + #[derive(Clone, Copy, PartialEq, ::prost::Oneof)] + pub enum Command { + #[prost(bool, tag = "1")] + AddListenerBusListener(bool), + #[prost(bool, tag = "2")] + RemoveListenerBusListener(bool), + } +} +/// The protocol for the returned events in the long-running response channel. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct StreamingQueryListenerEvent { + /// (Required) The json serialized event, all StreamingQueryListener events have a json method + #[prost(string, tag = "1")] + pub event_json: ::prost::alloc::string::String, + /// (Required) Query event type used by client to decide how to deserialize the event_json + #[prost(enumeration = "StreamingQueryEventType", tag = "2")] + pub event_type: i32, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct StreamingQueryListenerEventsResult { + #[prost(message, repeated, tag = "1")] + pub events: ::prost::alloc::vec::Vec, + #[prost(bool, optional, tag = "2")] + pub listener_bus_listener_added: ::core::option::Option, +} +/// Command to get the output of 'SparkContext.resources' +#[derive(Clone, Copy, PartialEq, ::prost::Message)] +pub struct GetResourcesCommand {} +/// Response for command 'GetResourcesCommand'. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GetResourcesCommandResult { + #[prost(map = "string, message", tag = "1")] + pub resources: ::std::collections::HashMap<::prost::alloc::string::String, ResourceInformation>, +} +/// Command to create ResourceProfile +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CreateResourceProfileCommand { + /// (Required) The ResourceProfile to be built on the server-side. + #[prost(message, optional, tag = "1")] + pub profile: ::core::option::Option, +} +/// Response for command 'CreateResourceProfileCommand'. +#[derive(Clone, Copy, PartialEq, ::prost::Message)] +pub struct CreateResourceProfileCommandResult { + /// (Required) Server-side generated resource profile id. + #[prost(int32, tag = "1")] + pub profile_id: i32, +} +/// Command to remove `CashedRemoteRelation` +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct RemoveCachedRemoteRelationCommand { + /// (Required) The remote to be related + #[prost(message, optional, tag = "1")] + pub relation: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CheckpointCommand { + /// (Required) The logical plan to checkpoint. + #[prost(message, optional, tag = "1")] + pub relation: ::core::option::Option, + /// (Required) Locally checkpoint using a local temporary + /// directory in Spark Connect server (Spark Driver) + #[prost(bool, tag = "2")] + pub local: bool, + /// (Required) Whether to checkpoint this dataframe immediately. + #[prost(bool, tag = "3")] + pub eager: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct MergeIntoTableCommand { + /// (Required) The name of the target table. + #[prost(string, tag = "1")] + pub target_table_name: ::prost::alloc::string::String, + /// (Required) The relation of the source table. + #[prost(message, optional, tag = "2")] + pub source_table_plan: ::core::option::Option, + /// (Required) The condition to match the source and target. + #[prost(message, optional, tag = "3")] + pub merge_condition: ::core::option::Option, + /// (Optional) The actions to be taken when the condition is matched. + #[prost(message, repeated, tag = "4")] + pub match_actions: ::prost::alloc::vec::Vec, + /// (Optional) The actions to be taken when the condition is not matched. + #[prost(message, repeated, tag = "5")] + pub not_matched_actions: ::prost::alloc::vec::Vec, + /// (Optional) The actions to be taken when the condition is not matched by source. + #[prost(message, repeated, tag = "6")] + pub not_matched_by_source_actions: ::prost::alloc::vec::Vec, + /// (Required) Whether to enable schema evolution. + #[prost(bool, tag = "7")] + pub with_schema_evolution: bool, +} +/// The enum used for client side streaming query listener event +/// There is no QueryStartedEvent defined here, +/// it is added as a field in WriteStreamOperationStartResult +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum StreamingQueryEventType { + QueryProgressUnspecified = 0, + QueryProgressEvent = 1, + QueryTerminatedEvent = 2, + QueryIdleEvent = 3, +} +impl StreamingQueryEventType { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + Self::QueryProgressUnspecified => "QUERY_PROGRESS_UNSPECIFIED", + Self::QueryProgressEvent => "QUERY_PROGRESS_EVENT", + Self::QueryTerminatedEvent => "QUERY_TERMINATED_EVENT", + Self::QueryIdleEvent => "QUERY_IDLE_EVENT", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "QUERY_PROGRESS_UNSPECIFIED" => Some(Self::QueryProgressUnspecified), + "QUERY_PROGRESS_EVENT" => Some(Self::QueryProgressEvent), + "QUERY_TERMINATED_EVENT" => Some(Self::QueryTerminatedEvent), + "QUERY_IDLE_EVENT" => Some(Self::QueryIdleEvent), + _ => None, + } + } +} +/// A \[[Plan]\] is the structure that carries the runtime information for the execution from the +/// client to the server. A \[[Plan]\] can either be of the type \[[Relation]\] which is a reference +/// to the underlying logical plan or it can be of the \[[Command]\] type that is used to execute +/// commands on the server. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Plan { + #[prost(oneof = "plan::OpType", tags = "1, 2")] + pub op_type: ::core::option::Option, +} +/// Nested message and enum types in `Plan`. +pub mod plan { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum OpType { + #[prost(message, tag = "1")] + Root(super::Relation), + #[prost(message, tag = "2")] + Command(super::Command), + } +} +/// User Context is used to refer to one particular user session that is executing +/// queries in the backend. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct UserContext { + #[prost(string, tag = "1")] + pub user_id: ::prost::alloc::string::String, + #[prost(string, tag = "2")] + pub user_name: ::prost::alloc::string::String, + /// To extend the existing user context message that is used to identify incoming requests, + /// Spark Connect leverages the Any protobuf type that can be used to inject arbitrary other + /// messages into this message. Extensions are stored as a `repeated` type to be able to + /// handle multiple active extensions. + #[prost(message, repeated, tag = "999")] + pub extensions: ::prost::alloc::vec::Vec<::prost_types::Any>, +} +/// Request to perform plan analyze, optionally to explain the plan. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct AnalyzePlanRequest { + /// (Required) + /// + /// The session_id specifies a spark session for a user id (which is specified + /// by user_context.user_id). The session_id is set by the client to be able to + /// collate streaming responses from different queries within the dedicated session. + /// The id should be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff` + #[prost(string, tag = "1")] + pub session_id: ::prost::alloc::string::String, + /// (Optional) + /// + /// Server-side generated idempotency key from the previous responses (if any). Server + /// can use this to validate that the server side session has not changed. + #[prost(string, optional, tag = "17")] + pub client_observed_server_side_session_id: + ::core::option::Option<::prost::alloc::string::String>, + /// (Required) User context + #[prost(message, optional, tag = "2")] + pub user_context: ::core::option::Option, + /// Provides optional information about the client sending the request. This field + /// can be used for language or version specific information and is only intended for + /// logging purposes and will not be interpreted by the server. + #[prost(string, optional, tag = "3")] + pub client_type: ::core::option::Option<::prost::alloc::string::String>, + #[prost( + oneof = "analyze_plan_request::Analyze", + tags = "4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16" + )] + pub analyze: ::core::option::Option, +} +/// Nested message and enum types in `AnalyzePlanRequest`. +pub mod analyze_plan_request { + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Schema { + /// (Required) The logical plan to be analyzed. + #[prost(message, optional, tag = "1")] + pub plan: ::core::option::Option, + } + /// Explains the input plan based on a configurable mode. + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Explain { + /// (Required) The logical plan to be analyzed. + #[prost(message, optional, tag = "1")] + pub plan: ::core::option::Option, + /// (Required) For analyzePlan rpc calls, configure the mode to explain plan in strings. + #[prost(enumeration = "explain::ExplainMode", tag = "2")] + pub explain_mode: i32, + } + /// Nested message and enum types in `Explain`. + pub mod explain { + /// Plan explanation mode. + #[derive( + Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration, + )] + #[repr(i32)] + pub enum ExplainMode { + Unspecified = 0, + /// Generates only physical plan. + Simple = 1, + /// Generates parsed logical plan, analyzed logical plan, optimized logical plan and physical plan. + /// Parsed Logical plan is a unresolved plan that extracted from the query. Analyzed logical plans + /// transforms which translates unresolvedAttribute and unresolvedRelation into fully typed objects. + /// The optimized logical plan transforms through a set of optimization rules, resulting in the + /// physical plan. + Extended = 2, + /// Generates code for the statement, if any and a physical plan. + Codegen = 3, + /// If plan node statistics are available, generates a logical plan and also the statistics. + Cost = 4, + /// Generates a physical plan outline and also node details. + Formatted = 5, + } + impl ExplainMode { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + Self::Unspecified => "EXPLAIN_MODE_UNSPECIFIED", + Self::Simple => "EXPLAIN_MODE_SIMPLE", + Self::Extended => "EXPLAIN_MODE_EXTENDED", + Self::Codegen => "EXPLAIN_MODE_CODEGEN", + Self::Cost => "EXPLAIN_MODE_COST", + Self::Formatted => "EXPLAIN_MODE_FORMATTED", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "EXPLAIN_MODE_UNSPECIFIED" => Some(Self::Unspecified), + "EXPLAIN_MODE_SIMPLE" => Some(Self::Simple), + "EXPLAIN_MODE_EXTENDED" => Some(Self::Extended), + "EXPLAIN_MODE_CODEGEN" => Some(Self::Codegen), + "EXPLAIN_MODE_COST" => Some(Self::Cost), + "EXPLAIN_MODE_FORMATTED" => Some(Self::Formatted), + _ => None, + } + } + } + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct TreeString { + /// (Required) The logical plan to be analyzed. + #[prost(message, optional, tag = "1")] + pub plan: ::core::option::Option, + /// (Optional) Max level of the schema. + #[prost(int32, optional, tag = "2")] + pub level: ::core::option::Option, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct IsLocal { + /// (Required) The logical plan to be analyzed. + #[prost(message, optional, tag = "1")] + pub plan: ::core::option::Option, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct IsStreaming { + /// (Required) The logical plan to be analyzed. + #[prost(message, optional, tag = "1")] + pub plan: ::core::option::Option, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct InputFiles { + /// (Required) The logical plan to be analyzed. + #[prost(message, optional, tag = "1")] + pub plan: ::core::option::Option, + } + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct SparkVersion {} + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct DdlParse { + /// (Required) The DDL formatted string to be parsed. + #[prost(string, tag = "1")] + pub ddl_string: ::prost::alloc::string::String, + } + /// Returns `true` when the logical query plans are equal and therefore return same results. + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct SameSemantics { + /// (Required) The plan to be compared. + #[prost(message, optional, tag = "1")] + pub target_plan: ::core::option::Option, + /// (Required) The other plan to be compared. + #[prost(message, optional, tag = "2")] + pub other_plan: ::core::option::Option, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct SemanticHash { + /// (Required) The logical plan to get a hashCode. + #[prost(message, optional, tag = "1")] + pub plan: ::core::option::Option, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Persist { + /// (Required) The logical plan to persist. + #[prost(message, optional, tag = "1")] + pub relation: ::core::option::Option, + /// (Optional) The storage level. + #[prost(message, optional, tag = "2")] + pub storage_level: ::core::option::Option, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Unpersist { + /// (Required) The logical plan to unpersist. + #[prost(message, optional, tag = "1")] + pub relation: ::core::option::Option, + /// (Optional) Whether to block until all blocks are deleted. + #[prost(bool, optional, tag = "2")] + pub blocking: ::core::option::Option, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct GetStorageLevel { + /// (Required) The logical plan to get the storage level. + #[prost(message, optional, tag = "1")] + pub relation: ::core::option::Option, + } + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Analyze { + #[prost(message, tag = "4")] + Schema(Schema), + #[prost(message, tag = "5")] + Explain(Explain), + #[prost(message, tag = "6")] + TreeString(TreeString), + #[prost(message, tag = "7")] + IsLocal(IsLocal), + #[prost(message, tag = "8")] + IsStreaming(IsStreaming), + #[prost(message, tag = "9")] + InputFiles(InputFiles), + #[prost(message, tag = "10")] + SparkVersion(SparkVersion), + #[prost(message, tag = "11")] + DdlParse(DdlParse), + #[prost(message, tag = "12")] + SameSemantics(SameSemantics), + #[prost(message, tag = "13")] + SemanticHash(SemanticHash), + #[prost(message, tag = "14")] + Persist(Persist), + #[prost(message, tag = "15")] + Unpersist(Unpersist), + #[prost(message, tag = "16")] + GetStorageLevel(GetStorageLevel), + } +} +/// Response to performing analysis of the query. Contains relevant metadata to be able to +/// reason about the performance. +/// Next ID: 16 +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct AnalyzePlanResponse { + #[prost(string, tag = "1")] + pub session_id: ::prost::alloc::string::String, + /// Server-side generated idempotency key that the client can use to assert that the server side + /// session has not changed. + #[prost(string, tag = "15")] + pub server_side_session_id: ::prost::alloc::string::String, + #[prost( + oneof = "analyze_plan_response::Result", + tags = "2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14" + )] + pub result: ::core::option::Option, +} +/// Nested message and enum types in `AnalyzePlanResponse`. +pub mod analyze_plan_response { + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Schema { + #[prost(message, optional, tag = "1")] + pub schema: ::core::option::Option, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Explain { + #[prost(string, tag = "1")] + pub explain_string: ::prost::alloc::string::String, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct TreeString { + #[prost(string, tag = "1")] + pub tree_string: ::prost::alloc::string::String, + } + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct IsLocal { + #[prost(bool, tag = "1")] + pub is_local: bool, + } + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct IsStreaming { + #[prost(bool, tag = "1")] + pub is_streaming: bool, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct InputFiles { + /// A best-effort snapshot of the files that compose this Dataset + #[prost(string, repeated, tag = "1")] + pub files: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct SparkVersion { + #[prost(string, tag = "1")] + pub version: ::prost::alloc::string::String, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct DdlParse { + #[prost(message, optional, tag = "1")] + pub parsed: ::core::option::Option, + } + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct SameSemantics { + #[prost(bool, tag = "1")] + pub result: bool, + } + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct SemanticHash { + #[prost(int32, tag = "1")] + pub result: i32, + } + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct Persist {} + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct Unpersist {} + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct GetStorageLevel { + /// (Required) The StorageLevel as a result of get_storage_level request. + #[prost(message, optional, tag = "1")] + pub storage_level: ::core::option::Option, + } + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Result { + #[prost(message, tag = "2")] + Schema(Schema), + #[prost(message, tag = "3")] + Explain(Explain), + #[prost(message, tag = "4")] + TreeString(TreeString), + #[prost(message, tag = "5")] + IsLocal(IsLocal), + #[prost(message, tag = "6")] + IsStreaming(IsStreaming), + #[prost(message, tag = "7")] + InputFiles(InputFiles), + #[prost(message, tag = "8")] + SparkVersion(SparkVersion), + #[prost(message, tag = "9")] + DdlParse(DdlParse), + #[prost(message, tag = "10")] + SameSemantics(SameSemantics), + #[prost(message, tag = "11")] + SemanticHash(SemanticHash), + #[prost(message, tag = "12")] + Persist(Persist), + #[prost(message, tag = "13")] + Unpersist(Unpersist), + #[prost(message, tag = "14")] + GetStorageLevel(GetStorageLevel), + } +} +/// A request to be executed by the service. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExecutePlanRequest { + /// (Required) + /// + /// The session_id specifies a spark session for a user id (which is specified + /// by user_context.user_id). The session_id is set by the client to be able to + /// collate streaming responses from different queries within the dedicated session. + /// The id should be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff` + #[prost(string, tag = "1")] + pub session_id: ::prost::alloc::string::String, + /// (Optional) + /// + /// Server-side generated idempotency key from the previous responses (if any). Server + /// can use this to validate that the server side session has not changed. + #[prost(string, optional, tag = "8")] + pub client_observed_server_side_session_id: + ::core::option::Option<::prost::alloc::string::String>, + /// (Required) User context + /// + /// user_context.user_id and session+id both identify a unique remote spark session on the + /// server side. + #[prost(message, optional, tag = "2")] + pub user_context: ::core::option::Option, + /// (Optional) + /// Provide an id for this request. If not provided, it will be generated by the server. + /// It is returned in every ExecutePlanResponse.operation_id of the ExecutePlan response stream. + /// The id must be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff` + #[prost(string, optional, tag = "6")] + pub operation_id: ::core::option::Option<::prost::alloc::string::String>, + /// (Required) The logical plan to be executed / analyzed. + #[prost(message, optional, tag = "3")] + pub plan: ::core::option::Option, + /// Provides optional information about the client sending the request. This field + /// can be used for language or version specific information and is only intended for + /// logging purposes and will not be interpreted by the server. + #[prost(string, optional, tag = "4")] + pub client_type: ::core::option::Option<::prost::alloc::string::String>, + /// Repeated element for options that can be passed to the request. This element is currently + /// unused but allows to pass in an extension value used for arbitrary options. + #[prost(message, repeated, tag = "5")] + pub request_options: ::prost::alloc::vec::Vec, + /// Tags to tag the given execution with. + /// Tags cannot contain ',' character and cannot be empty strings. + /// Used by Interrupt with interrupt.tag. + #[prost(string, repeated, tag = "7")] + pub tags: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, +} +/// Nested message and enum types in `ExecutePlanRequest`. +pub mod execute_plan_request { + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct RequestOption { + #[prost(oneof = "request_option::RequestOption", tags = "1, 999")] + pub request_option: ::core::option::Option, + } + /// Nested message and enum types in `RequestOption`. + pub mod request_option { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum RequestOption { + #[prost(message, tag = "1")] + ReattachOptions(super::super::ReattachOptions), + /// Extension type for request options + #[prost(message, tag = "999")] + Extension(::prost_types::Any), + } + } +} +/// The response of a query, can be one or more for each request. Responses belonging to the +/// same input query, carry the same `session_id`. +/// Next ID: 17 +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExecutePlanResponse { + #[prost(string, tag = "1")] + pub session_id: ::prost::alloc::string::String, + /// Server-side generated idempotency key that the client can use to assert that the server side + /// session has not changed. + #[prost(string, tag = "15")] + pub server_side_session_id: ::prost::alloc::string::String, + /// Identifies the ExecutePlan execution. + /// If set by the client in ExecutePlanRequest.operationId, that value is returned. + /// Otherwise generated by the server. + /// It is an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff` + #[prost(string, tag = "12")] + pub operation_id: ::prost::alloc::string::String, + /// Identified the response in the stream. + /// The id is an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff` + #[prost(string, tag = "13")] + pub response_id: ::prost::alloc::string::String, + /// Metrics for the query execution. Typically, this field is only present in the last + /// batch of results and then represent the overall state of the query execution. + #[prost(message, optional, tag = "4")] + pub metrics: ::core::option::Option, + /// The metrics observed during the execution of the query plan. + #[prost(message, repeated, tag = "6")] + pub observed_metrics: ::prost::alloc::vec::Vec, + /// (Optional) The Spark schema. This field is available when `collect` is called. + #[prost(message, optional, tag = "7")] + pub schema: ::core::option::Option, + /// Union type for the different response messages. + #[prost( + oneof = "execute_plan_response::ResponseType", + tags = "2, 5, 8, 9, 10, 11, 16, 14, 17, 18, 19, 999" + )] + pub response_type: ::core::option::Option, +} +/// Nested message and enum types in `ExecutePlanResponse`. +pub mod execute_plan_response { + /// A SQL command returns an opaque Relation that can be directly used as input for the next + /// call. + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct SqlCommandResult { + #[prost(message, optional, tag = "1")] + pub relation: ::core::option::Option, + } + /// Batch results of metrics. + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct ArrowBatch { + /// Count rows in `data`. Must match the number of rows inside `data`. + #[prost(int64, tag = "1")] + pub row_count: i64, + /// Serialized Arrow data. + #[prost(bytes = "vec", tag = "2")] + pub data: ::prost::alloc::vec::Vec, + /// If set, row offset of the start of this ArrowBatch in execution results. + #[prost(int64, optional, tag = "3")] + pub start_offset: ::core::option::Option, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Metrics { + #[prost(message, repeated, tag = "1")] + pub metrics: ::prost::alloc::vec::Vec, + } + /// Nested message and enum types in `Metrics`. + pub mod metrics { + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct MetricObject { + #[prost(string, tag = "1")] + pub name: ::prost::alloc::string::String, + #[prost(int64, tag = "2")] + pub plan_id: i64, + #[prost(int64, tag = "3")] + pub parent: i64, + #[prost(map = "string, message", tag = "4")] + pub execution_metrics: + ::std::collections::HashMap<::prost::alloc::string::String, MetricValue>, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct MetricValue { + #[prost(string, tag = "1")] + pub name: ::prost::alloc::string::String, + #[prost(int64, tag = "2")] + pub value: i64, + #[prost(string, tag = "3")] + pub metric_type: ::prost::alloc::string::String, + } + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct ObservedMetrics { + #[prost(string, tag = "1")] + pub name: ::prost::alloc::string::String, + #[prost(message, repeated, tag = "2")] + pub values: ::prost::alloc::vec::Vec, + #[prost(string, repeated, tag = "3")] + pub keys: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + #[prost(int64, tag = "4")] + pub plan_id: i64, + } + /// If present, in a reattachable execution this means that after server sends onComplete, + /// the execution is complete. If the server sends onComplete without sending a ResultComplete, + /// it means that there is more, and the client should use ReattachExecute RPC to continue. + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct ResultComplete {} + /// This message is used to communicate progress about the query progress during the execution. + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct ExecutionProgress { + /// Captures the progress of each individual stage. + #[prost(message, repeated, tag = "1")] + pub stages: ::prost::alloc::vec::Vec, + /// Captures the currently in progress tasks. + #[prost(int64, tag = "2")] + pub num_inflight_tasks: i64, + } + /// Nested message and enum types in `ExecutionProgress`. + pub mod execution_progress { + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct StageInfo { + #[prost(int64, tag = "1")] + pub stage_id: i64, + #[prost(int64, tag = "2")] + pub num_tasks: i64, + #[prost(int64, tag = "3")] + pub num_completed_tasks: i64, + #[prost(int64, tag = "4")] + pub input_bytes_read: i64, + #[prost(bool, tag = "5")] + pub done: bool, + } + } + /// Union type for the different response messages. + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum ResponseType { + #[prost(message, tag = "2")] + ArrowBatch(ArrowBatch), + /// Special case for executing SQL commands. + #[prost(message, tag = "5")] + SqlCommandResult(SqlCommandResult), + /// Response for a streaming query. + #[prost(message, tag = "8")] + WriteStreamOperationStartResult(super::WriteStreamOperationStartResult), + /// Response for commands on a streaming query. + #[prost(message, tag = "9")] + StreamingQueryCommandResult(super::StreamingQueryCommandResult), + /// Response for 'SparkContext.resources'. + #[prost(message, tag = "10")] + GetResourcesCommandResult(super::GetResourcesCommandResult), + /// Response for commands on the streaming query manager. + #[prost(message, tag = "11")] + StreamingQueryManagerCommandResult(super::StreamingQueryManagerCommandResult), + /// Response for commands on the client side streaming query listener. + #[prost(message, tag = "16")] + StreamingQueryListenerEventsResult(super::StreamingQueryListenerEventsResult), + /// Response type informing if the stream is complete in reattachable execution. + #[prost(message, tag = "14")] + ResultComplete(ResultComplete), + /// Response for command that creates ResourceProfile. + #[prost(message, tag = "17")] + CreateResourceProfileCommandResult(super::CreateResourceProfileCommandResult), + /// (Optional) Intermediate query progress reports. + #[prost(message, tag = "18")] + ExecutionProgress(ExecutionProgress), + /// Response for command that checkpoints a DataFrame. + #[prost(message, tag = "19")] + CheckpointCommandResult(super::CheckpointCommandResult), + /// Support arbitrary result objects. + #[prost(message, tag = "999")] + Extension(::prost_types::Any), + } +} +/// The key-value pair for the config request and response. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct KeyValue { + /// (Required) The key. + #[prost(string, tag = "1")] + pub key: ::prost::alloc::string::String, + /// (Optional) The value. + #[prost(string, optional, tag = "2")] + pub value: ::core::option::Option<::prost::alloc::string::String>, +} +/// Request to update or fetch the configurations. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ConfigRequest { + /// (Required) + /// + /// The session_id specifies a spark session for a user id (which is specified + /// by user_context.user_id). The session_id is set by the client to be able to + /// collate streaming responses from different queries within the dedicated session. + /// The id should be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff` + #[prost(string, tag = "1")] + pub session_id: ::prost::alloc::string::String, + /// (Optional) + /// + /// Server-side generated idempotency key from the previous responses (if any). Server + /// can use this to validate that the server side session has not changed. + #[prost(string, optional, tag = "8")] + pub client_observed_server_side_session_id: + ::core::option::Option<::prost::alloc::string::String>, + /// (Required) User context + #[prost(message, optional, tag = "2")] + pub user_context: ::core::option::Option, + /// (Required) The operation for the config. + #[prost(message, optional, tag = "3")] + pub operation: ::core::option::Option, + /// Provides optional information about the client sending the request. This field + /// can be used for language or version specific information and is only intended for + /// logging purposes and will not be interpreted by the server. + #[prost(string, optional, tag = "4")] + pub client_type: ::core::option::Option<::prost::alloc::string::String>, +} +/// Nested message and enum types in `ConfigRequest`. +pub mod config_request { + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Operation { + #[prost(oneof = "operation::OpType", tags = "1, 2, 3, 4, 5, 6, 7")] + pub op_type: ::core::option::Option, + } + /// Nested message and enum types in `Operation`. + pub mod operation { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum OpType { + #[prost(message, tag = "1")] + Set(super::Set), + #[prost(message, tag = "2")] + Get(super::Get), + #[prost(message, tag = "3")] + GetWithDefault(super::GetWithDefault), + #[prost(message, tag = "4")] + GetOption(super::GetOption), + #[prost(message, tag = "5")] + GetAll(super::GetAll), + #[prost(message, tag = "6")] + Unset(super::Unset), + #[prost(message, tag = "7")] + IsModifiable(super::IsModifiable), + } + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Set { + /// (Required) The config key-value pairs to set. + #[prost(message, repeated, tag = "1")] + pub pairs: ::prost::alloc::vec::Vec, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Get { + /// (Required) The config keys to get. + #[prost(string, repeated, tag = "1")] + pub keys: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct GetWithDefault { + /// (Required) The config key-value pairs to get. The value will be used as the default value. + #[prost(message, repeated, tag = "1")] + pub pairs: ::prost::alloc::vec::Vec, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct GetOption { + /// (Required) The config keys to get optionally. + #[prost(string, repeated, tag = "1")] + pub keys: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct GetAll { + /// (Optional) The prefix of the config key to get. + #[prost(string, optional, tag = "1")] + pub prefix: ::core::option::Option<::prost::alloc::string::String>, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Unset { + /// (Required) The config keys to unset. + #[prost(string, repeated, tag = "1")] + pub keys: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + } + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct IsModifiable { + /// (Required) The config keys to check the config is modifiable. + #[prost(string, repeated, tag = "1")] + pub keys: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + } +} +/// Response to the config request. +/// Next ID: 5 +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ConfigResponse { + #[prost(string, tag = "1")] + pub session_id: ::prost::alloc::string::String, + /// Server-side generated idempotency key that the client can use to assert that the server side + /// session has not changed. + #[prost(string, tag = "4")] + pub server_side_session_id: ::prost::alloc::string::String, + /// (Optional) The result key-value pairs. + /// + /// Available when the operation is 'Get', 'GetWithDefault', 'GetOption', 'GetAll'. + /// Also available for the operation 'IsModifiable' with boolean string "true" and "false". + #[prost(message, repeated, tag = "2")] + pub pairs: ::prost::alloc::vec::Vec, + /// (Optional) + /// + /// Warning messages for deprecated or unsupported configurations. + #[prost(string, repeated, tag = "3")] + pub warnings: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, +} +/// Request to transfer client-local artifacts. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct AddArtifactsRequest { + /// (Required) + /// + /// The session_id specifies a spark session for a user id (which is specified + /// by user_context.user_id). The session_id is set by the client to be able to + /// collate streaming responses from different queries within the dedicated session. + /// The id should be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff` + #[prost(string, tag = "1")] + pub session_id: ::prost::alloc::string::String, + /// User context + #[prost(message, optional, tag = "2")] + pub user_context: ::core::option::Option, + /// (Optional) + /// + /// Server-side generated idempotency key from the previous responses (if any). Server + /// can use this to validate that the server side session has not changed. + #[prost(string, optional, tag = "7")] + pub client_observed_server_side_session_id: + ::core::option::Option<::prost::alloc::string::String>, + /// Provides optional information about the client sending the request. This field + /// can be used for language or version specific information and is only intended for + /// logging purposes and will not be interpreted by the server. + #[prost(string, optional, tag = "6")] + pub client_type: ::core::option::Option<::prost::alloc::string::String>, + /// The payload is either a batch of artifacts or a partial chunk of a large artifact. + #[prost(oneof = "add_artifacts_request::Payload", tags = "3, 4, 5")] + pub payload: ::core::option::Option, +} +/// Nested message and enum types in `AddArtifactsRequest`. +pub mod add_artifacts_request { + /// A chunk of an Artifact. + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct ArtifactChunk { + /// Data chunk. + #[prost(bytes = "vec", tag = "1")] + pub data: ::prost::alloc::vec::Vec, + /// CRC to allow server to verify integrity of the chunk. + #[prost(int64, tag = "2")] + pub crc: i64, + } + /// An artifact that is contained in a single `ArtifactChunk`. + /// Generally, this message represents tiny artifacts such as REPL-generated class files. + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct SingleChunkArtifact { + /// The name of the artifact is expected in the form of a "Relative Path" that is made up of a + /// sequence of directories and the final file element. + /// Examples of "Relative Path"s: "jars/test.jar", "classes/xyz.class", "abc.xyz", "a/b/X.jar". + /// The server is expected to maintain the hierarchy of files as defined by their name. (i.e + /// The relative path of the file on the server's filesystem will be the same as the name of + /// the provided artifact) + #[prost(string, tag = "1")] + pub name: ::prost::alloc::string::String, + /// A single data chunk. + #[prost(message, optional, tag = "2")] + pub data: ::core::option::Option, + } + /// A number of `SingleChunkArtifact` batched into a single RPC. + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Batch { + #[prost(message, repeated, tag = "1")] + pub artifacts: ::prost::alloc::vec::Vec, + } + /// Signals the beginning/start of a chunked artifact. + /// A large artifact is transferred through a payload of `BeginChunkedArtifact` followed by a + /// sequence of `ArtifactChunk`s. + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct BeginChunkedArtifact { + /// Name of the artifact undergoing chunking. Follows the same conventions as the `name` in + /// the `Artifact` message. + #[prost(string, tag = "1")] + pub name: ::prost::alloc::string::String, + /// Total size of the artifact in bytes. + #[prost(int64, tag = "2")] + pub total_bytes: i64, + /// Number of chunks the artifact is split into. + /// This includes the `initial_chunk`. + #[prost(int64, tag = "3")] + pub num_chunks: i64, + /// The first/initial chunk. + #[prost(message, optional, tag = "4")] + pub initial_chunk: ::core::option::Option, + } + /// The payload is either a batch of artifacts or a partial chunk of a large artifact. + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Payload { + #[prost(message, tag = "3")] + Batch(Batch), + /// The metadata and the initial chunk of a large artifact chunked into multiple requests. + /// The server side is notified about the total size of the large artifact as well as the + /// number of chunks to expect. + #[prost(message, tag = "4")] + BeginChunk(BeginChunkedArtifact), + /// A chunk of an artifact excluding metadata. This can be any chunk of a large artifact + /// excluding the first chunk (which is included in `BeginChunkedArtifact`). + #[prost(message, tag = "5")] + Chunk(ArtifactChunk), + } +} +/// Response to adding an artifact. Contains relevant metadata to verify successful transfer of +/// artifact(s). +/// Next ID: 4 +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct AddArtifactsResponse { + /// Session id in which the AddArtifact was running. + #[prost(string, tag = "2")] + pub session_id: ::prost::alloc::string::String, + /// Server-side generated idempotency key that the client can use to assert that the server side + /// session has not changed. + #[prost(string, tag = "3")] + pub server_side_session_id: ::prost::alloc::string::String, + /// The list of artifact(s) seen by the server. + #[prost(message, repeated, tag = "1")] + pub artifacts: ::prost::alloc::vec::Vec, +} +/// Nested message and enum types in `AddArtifactsResponse`. +pub mod add_artifacts_response { + /// Metadata of an artifact. + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct ArtifactSummary { + #[prost(string, tag = "1")] + pub name: ::prost::alloc::string::String, + /// Whether the CRC (Cyclic Redundancy Check) is successful on server verification. + /// The server discards any artifact that fails the CRC. + /// If false, the client may choose to resend the artifact specified by `name`. + #[prost(bool, tag = "2")] + pub is_crc_successful: bool, + } +} +/// Request to get current statuses of artifacts at the server side. +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ArtifactStatusesRequest { + /// (Required) + /// + /// The session_id specifies a spark session for a user id (which is specified + /// by user_context.user_id). The session_id is set by the client to be able to + /// collate streaming responses from different queries within the dedicated session. + /// The id should be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff` + #[prost(string, tag = "1")] + pub session_id: ::prost::alloc::string::String, + /// (Optional) + /// + /// Server-side generated idempotency key from the previous responses (if any). Server + /// can use this to validate that the server side session has not changed. + #[prost(string, optional, tag = "5")] + pub client_observed_server_side_session_id: + ::core::option::Option<::prost::alloc::string::String>, + /// User context + #[prost(message, optional, tag = "2")] + pub user_context: ::core::option::Option, + /// Provides optional information about the client sending the request. This field + /// can be used for language or version specific information and is only intended for + /// logging purposes and will not be interpreted by the server. + #[prost(string, optional, tag = "3")] + pub client_type: ::core::option::Option<::prost::alloc::string::String>, + /// The name of the artifact is expected in the form of a "Relative Path" that is made up of a + /// sequence of directories and the final file element. + /// Examples of "Relative Path"s: "jars/test.jar", "classes/xyz.class", "abc.xyz", "a/b/X.jar". + /// The server is expected to maintain the hierarchy of files as defined by their name. (i.e + /// The relative path of the file on the server's filesystem will be the same as the name of + /// the provided artifact) + #[prost(string, repeated, tag = "4")] + pub names: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, +} +/// Response to checking artifact statuses. +/// Next ID: 4 +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ArtifactStatusesResponse { + /// Session id in which the ArtifactStatus was running. + #[prost(string, tag = "2")] + pub session_id: ::prost::alloc::string::String, + /// Server-side generated idempotency key that the client can use to assert that the server side + /// session has not changed. + #[prost(string, tag = "3")] + pub server_side_session_id: ::prost::alloc::string::String, + /// A map of artifact names to their statuses. + #[prost(map = "string, message", tag = "1")] + pub statuses: ::std::collections::HashMap< + ::prost::alloc::string::String, + artifact_statuses_response::ArtifactStatus, + >, +} +/// Nested message and enum types in `ArtifactStatusesResponse`. +pub mod artifact_statuses_response { + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct ArtifactStatus { + /// Exists or not particular artifact at the server. + #[prost(bool, tag = "1")] + pub exists: bool, + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct InterruptRequest { + /// (Required) + /// + /// The session_id specifies a spark session for a user id (which is specified + /// by user_context.user_id). The session_id is set by the client to be able to + /// collate streaming responses from different queries within the dedicated session. + /// The id should be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff` + #[prost(string, tag = "1")] + pub session_id: ::prost::alloc::string::String, + /// (Optional) + /// + /// Server-side generated idempotency key from the previous responses (if any). Server + /// can use this to validate that the server side session has not changed. + #[prost(string, optional, tag = "7")] + pub client_observed_server_side_session_id: + ::core::option::Option<::prost::alloc::string::String>, + /// (Required) User context + #[prost(message, optional, tag = "2")] + pub user_context: ::core::option::Option, + /// Provides optional information about the client sending the request. This field + /// can be used for language or version specific information and is only intended for + /// logging purposes and will not be interpreted by the server. + #[prost(string, optional, tag = "3")] + pub client_type: ::core::option::Option<::prost::alloc::string::String>, + /// (Required) The type of interrupt to execute. + #[prost(enumeration = "interrupt_request::InterruptType", tag = "4")] + pub interrupt_type: i32, + #[prost(oneof = "interrupt_request::Interrupt", tags = "5, 6")] + pub interrupt: ::core::option::Option, +} +/// Nested message and enum types in `InterruptRequest`. +pub mod interrupt_request { + #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] + #[repr(i32)] + pub enum InterruptType { + Unspecified = 0, + /// Interrupt all running executions within the session with the provided session_id. + All = 1, + /// Interrupt all running executions within the session with the provided operation_tag. + Tag = 2, + /// Interrupt the running execution within the session with the provided operation_id. + OperationId = 3, + } + impl InterruptType { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + Self::Unspecified => "INTERRUPT_TYPE_UNSPECIFIED", + Self::All => "INTERRUPT_TYPE_ALL", + Self::Tag => "INTERRUPT_TYPE_TAG", + Self::OperationId => "INTERRUPT_TYPE_OPERATION_ID", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "INTERRUPT_TYPE_UNSPECIFIED" => Some(Self::Unspecified), + "INTERRUPT_TYPE_ALL" => Some(Self::All), + "INTERRUPT_TYPE_TAG" => Some(Self::Tag), + "INTERRUPT_TYPE_OPERATION_ID" => Some(Self::OperationId), + _ => None, + } + } + } + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Interrupt { + /// if interrupt_tag == INTERRUPT_TYPE_TAG, interrupt operation with this tag. + #[prost(string, tag = "5")] + OperationTag(::prost::alloc::string::String), + /// if interrupt_tag == INTERRUPT_TYPE_OPERATION_ID, interrupt operation with this operation_id. + #[prost(string, tag = "6")] + OperationId(::prost::alloc::string::String), + } +} +/// Next ID: 4 +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct InterruptResponse { + /// Session id in which the interrupt was running. + #[prost(string, tag = "1")] + pub session_id: ::prost::alloc::string::String, + /// Server-side generated idempotency key that the client can use to assert that the server side + /// session has not changed. + #[prost(string, tag = "3")] + pub server_side_session_id: ::prost::alloc::string::String, + /// Operation ids of the executions which were interrupted. + #[prost(string, repeated, tag = "2")] + pub interrupted_ids: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, +} +#[derive(Clone, Copy, PartialEq, ::prost::Message)] +pub struct ReattachOptions { + /// If true, the request can be reattached to using ReattachExecute. + /// ReattachExecute can be used either if the stream broke with a GRPC network error, + /// or if the server closed the stream without sending a response with StreamStatus.complete=true. + /// The server will keep a buffer of responses in case a response is lost, and + /// ReattachExecute needs to back-track. + /// + /// If false, the execution response stream will will not be reattachable, and all responses are + /// immediately released by the server after being sent. + #[prost(bool, tag = "1")] + pub reattachable: bool, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ReattachExecuteRequest { + /// (Required) + /// + /// The session_id of the request to reattach to. + /// This must be an id of existing session. + #[prost(string, tag = "1")] + pub session_id: ::prost::alloc::string::String, + /// (Optional) + /// + /// Server-side generated idempotency key from the previous responses (if any). Server + /// can use this to validate that the server side session has not changed. + #[prost(string, optional, tag = "6")] + pub client_observed_server_side_session_id: + ::core::option::Option<::prost::alloc::string::String>, + /// (Required) User context + /// + /// user_context.user_id and session+id both identify a unique remote spark session on the + /// server side. + #[prost(message, optional, tag = "2")] + pub user_context: ::core::option::Option, + /// (Required) + /// Provide an id of the request to reattach to. + /// This must be an id of existing operation. + #[prost(string, tag = "3")] + pub operation_id: ::prost::alloc::string::String, + /// Provides optional information about the client sending the request. This field + /// can be used for language or version specific information and is only intended for + /// logging purposes and will not be interpreted by the server. + #[prost(string, optional, tag = "4")] + pub client_type: ::core::option::Option<::prost::alloc::string::String>, + /// (Optional) + /// Last already processed response id from the response stream. + /// After reattach, server will resume the response stream after that response. + /// If not specified, server will restart the stream from the start. + /// + /// Note: server controls the amount of responses that it buffers and it may drop responses, + /// that are far behind the latest returned response, so this can't be used to arbitrarily + /// scroll back the cursor. If the response is no longer available, this will result in an error. + #[prost(string, optional, tag = "5")] + pub last_response_id: ::core::option::Option<::prost::alloc::string::String>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ReleaseExecuteRequest { + /// (Required) + /// + /// The session_id of the request to reattach to. + /// This must be an id of existing session. + #[prost(string, tag = "1")] + pub session_id: ::prost::alloc::string::String, + /// (Optional) + /// + /// Server-side generated idempotency key from the previous responses (if any). Server + /// can use this to validate that the server side session has not changed. + #[prost(string, optional, tag = "7")] + pub client_observed_server_side_session_id: + ::core::option::Option<::prost::alloc::string::String>, + /// (Required) User context + /// + /// user_context.user_id and session+id both identify a unique remote spark session on the + /// server side. + #[prost(message, optional, tag = "2")] + pub user_context: ::core::option::Option, + /// (Required) + /// Provide an id of the request to reattach to. + /// This must be an id of existing operation. + #[prost(string, tag = "3")] + pub operation_id: ::prost::alloc::string::String, + /// Provides optional information about the client sending the request. This field + /// can be used for language or version specific information and is only intended for + /// logging purposes and will not be interpreted by the server. + #[prost(string, optional, tag = "4")] + pub client_type: ::core::option::Option<::prost::alloc::string::String>, + #[prost(oneof = "release_execute_request::Release", tags = "5, 6")] + pub release: ::core::option::Option, +} +/// Nested message and enum types in `ReleaseExecuteRequest`. +pub mod release_execute_request { + /// Release and close operation completely. + /// This will also interrupt the query if it is running execution, and wait for it to be torn down. + #[derive(Clone, Copy, PartialEq, ::prost::Message)] + pub struct ReleaseAll {} + /// Release all responses from the operation response stream up to and including + /// the response with the given by response_id. + /// While server determines by itself how much of a buffer of responses to keep, client providing + /// explicit release calls will help reduce resource consumption. + /// Noop if response_id not found in cached responses. + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct ReleaseUntil { + #[prost(string, tag = "1")] + pub response_id: ::prost::alloc::string::String, + } + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Release { + #[prost(message, tag = "5")] + ReleaseAll(ReleaseAll), + #[prost(message, tag = "6")] + ReleaseUntil(ReleaseUntil), + } +} +/// Next ID: 4 +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ReleaseExecuteResponse { + /// Session id in which the release was running. + #[prost(string, tag = "1")] + pub session_id: ::prost::alloc::string::String, + /// Server-side generated idempotency key that the client can use to assert that the server side + /// session has not changed. + #[prost(string, tag = "3")] + pub server_side_session_id: ::prost::alloc::string::String, + /// Operation id of the operation on which the release executed. + /// If the operation couldn't be found (because e.g. it was concurrently released), will be unset. + /// Otherwise, it will be equal to the operation_id from request. + #[prost(string, optional, tag = "2")] + pub operation_id: ::core::option::Option<::prost::alloc::string::String>, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ReleaseSessionRequest { + /// (Required) + /// + /// The session_id of the request to reattach to. + /// This must be an id of existing session. + #[prost(string, tag = "1")] + pub session_id: ::prost::alloc::string::String, + /// (Required) User context + /// + /// user_context.user_id and session+id both identify a unique remote spark session on the + /// server side. + #[prost(message, optional, tag = "2")] + pub user_context: ::core::option::Option, + /// Provides optional information about the client sending the request. This field + /// can be used for language or version specific information and is only intended for + /// logging purposes and will not be interpreted by the server. + #[prost(string, optional, tag = "3")] + pub client_type: ::core::option::Option<::prost::alloc::string::String>, +} +/// Next ID: 3 +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ReleaseSessionResponse { + /// Session id of the session on which the release executed. + #[prost(string, tag = "1")] + pub session_id: ::prost::alloc::string::String, + /// Server-side generated idempotency key that the client can use to assert that the server side + /// session has not changed. + #[prost(string, tag = "2")] + pub server_side_session_id: ::prost::alloc::string::String, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct FetchErrorDetailsRequest { + /// (Required) + /// The session_id specifies a Spark session for a user identified by user_context.user_id. + /// The id should be a UUID string of the format `00112233-4455-6677-8899-aabbccddeeff`. + #[prost(string, tag = "1")] + pub session_id: ::prost::alloc::string::String, + /// (Optional) + /// + /// Server-side generated idempotency key from the previous responses (if any). Server + /// can use this to validate that the server side session has not changed. + #[prost(string, optional, tag = "5")] + pub client_observed_server_side_session_id: + ::core::option::Option<::prost::alloc::string::String>, + /// User context + #[prost(message, optional, tag = "2")] + pub user_context: ::core::option::Option, + /// (Required) + /// The id of the error. + #[prost(string, tag = "3")] + pub error_id: ::prost::alloc::string::String, + /// Provides optional information about the client sending the request. This field + /// can be used for language or version specific information and is only intended for + /// logging purposes and will not be interpreted by the server. + #[prost(string, optional, tag = "4")] + pub client_type: ::core::option::Option<::prost::alloc::string::String>, +} +/// Next ID: 5 +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct FetchErrorDetailsResponse { + /// Server-side generated idempotency key that the client can use to assert that the server side + /// session has not changed. + #[prost(string, tag = "3")] + pub server_side_session_id: ::prost::alloc::string::String, + #[prost(string, tag = "4")] + pub session_id: ::prost::alloc::string::String, + /// The index of the root error in errors. The field will not be set if the error is not found. + #[prost(int32, optional, tag = "1")] + pub root_error_idx: ::core::option::Option, + /// A list of errors. + #[prost(message, repeated, tag = "2")] + pub errors: ::prost::alloc::vec::Vec, +} +/// Nested message and enum types in `FetchErrorDetailsResponse`. +pub mod fetch_error_details_response { + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct StackTraceElement { + /// The fully qualified name of the class containing the execution point. + #[prost(string, tag = "1")] + pub declaring_class: ::prost::alloc::string::String, + /// The name of the method containing the execution point. + #[prost(string, tag = "2")] + pub method_name: ::prost::alloc::string::String, + /// The name of the file containing the execution point. + #[prost(string, optional, tag = "3")] + pub file_name: ::core::option::Option<::prost::alloc::string::String>, + /// The line number of the source line containing the execution point. + #[prost(int32, tag = "4")] + pub line_number: i32, + } + /// QueryContext defines the schema for the query context of a SparkThrowable. + /// It helps users understand where the error occurs while executing queries. + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct QueryContext { + #[prost(enumeration = "query_context::ContextType", tag = "10")] + pub context_type: i32, + /// The object type of the query which throws the exception. + /// If the exception is directly from the main query, it should be an empty string. + /// Otherwise, it should be the exact object type in upper case. For example, a "VIEW". + #[prost(string, tag = "1")] + pub object_type: ::prost::alloc::string::String, + /// The object name of the query which throws the exception. + /// If the exception is directly from the main query, it should be an empty string. + /// Otherwise, it should be the object name. For example, a view name "V1". + #[prost(string, tag = "2")] + pub object_name: ::prost::alloc::string::String, + /// The starting index in the query text which throws the exception. The index starts from 0. + #[prost(int32, tag = "3")] + pub start_index: i32, + /// The stopping index in the query which throws the exception. The index starts from 0. + #[prost(int32, tag = "4")] + pub stop_index: i32, + /// The corresponding fragment of the query which throws the exception. + #[prost(string, tag = "5")] + pub fragment: ::prost::alloc::string::String, + /// The user code (call site of the API) that caused throwing the exception. + #[prost(string, tag = "6")] + pub call_site: ::prost::alloc::string::String, + /// Summary of the exception cause. + #[prost(string, tag = "7")] + pub summary: ::prost::alloc::string::String, + } + /// Nested message and enum types in `QueryContext`. + pub mod query_context { + /// The type of this query context. + #[derive( + Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration, + )] + #[repr(i32)] + pub enum ContextType { + Sql = 0, + Dataframe = 1, + } + impl ContextType { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + Self::Sql => "SQL", + Self::Dataframe => "DATAFRAME", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "SQL" => Some(Self::Sql), + "DATAFRAME" => Some(Self::Dataframe), + _ => None, + } + } + } + } + /// SparkThrowable defines the schema for SparkThrowable exceptions. + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct SparkThrowable { + /// Succinct, human-readable, unique, and consistent representation of the error category. + #[prost(string, optional, tag = "1")] + pub error_class: ::core::option::Option<::prost::alloc::string::String>, + /// The message parameters for the error framework. + #[prost(map = "string, string", tag = "2")] + pub message_parameters: ::std::collections::HashMap< + ::prost::alloc::string::String, + ::prost::alloc::string::String, + >, + /// The query context of a SparkThrowable. + #[prost(message, repeated, tag = "3")] + pub query_contexts: ::prost::alloc::vec::Vec, + /// Portable error identifier across SQL engines + /// If null, error class or SQLSTATE is not set. + #[prost(string, optional, tag = "4")] + pub sql_state: ::core::option::Option<::prost::alloc::string::String>, + } + /// Error defines the schema for the representing exception. + #[derive(Clone, PartialEq, ::prost::Message)] + pub struct Error { + /// The fully qualified names of the exception class and its parent classes. + #[prost(string, repeated, tag = "1")] + pub error_type_hierarchy: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + /// The detailed message of the exception. + #[prost(string, tag = "2")] + pub message: ::prost::alloc::string::String, + /// The stackTrace of the exception. It will be set + /// if the SQLConf spark.sql.connect.serverStacktrace.enabled is true. + #[prost(message, repeated, tag = "3")] + pub stack_trace: ::prost::alloc::vec::Vec, + /// The index of the cause error in errors. + #[prost(int32, optional, tag = "4")] + pub cause_idx: ::core::option::Option, + /// The structured data of a SparkThrowable exception. + #[prost(message, optional, tag = "5")] + pub spark_throwable: ::core::option::Option, + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CheckpointCommandResult { + /// (Required) The logical plan checkpointed. + #[prost(message, optional, tag = "1")] + pub relation: ::core::option::Option, +} +/// Generated client implementations. +pub mod spark_connect_service_client { + #![allow( + unused_variables, + dead_code, + missing_docs, + clippy::wildcard_imports, + clippy::let_unit_value + )] + use tonic::codegen::{http::Uri, *}; + /// Main interface for the SparkConnect service. + #[derive(Debug, Clone)] + pub struct SparkConnectServiceClient { + inner: tonic::client::Grpc, + } + impl SparkConnectServiceClient { + /// Attempt to create a new client by connecting to a given endpoint. + pub async fn connect(dst: D) -> Result + where + D: TryInto, + D::Error: Into, + { + let conn = tonic::transport::Endpoint::new(dst)?.connect().await?; + Ok(Self::new(conn)) + } + } + impl SparkConnectServiceClient + where + T: tonic::client::GrpcService, + T::Error: Into, + T::ResponseBody: Body + std::marker::Send + 'static, + ::Error: Into + std::marker::Send, + { + pub fn new(inner: T) -> Self { + let inner = tonic::client::Grpc::new(inner); + Self { inner } + } + pub fn with_origin(inner: T, origin: Uri) -> Self { + let inner = tonic::client::Grpc::with_origin(inner, origin); + Self { inner } + } + pub fn with_interceptor( + inner: T, + interceptor: F, + ) -> SparkConnectServiceClient> + where + F: tonic::service::Interceptor, + T::ResponseBody: Default, + T: tonic::codegen::Service< + http::Request, + Response = http::Response< + >::ResponseBody, + >, + >, + >>::Error: + Into + std::marker::Send + std::marker::Sync, + { + SparkConnectServiceClient::new(InterceptedService::new(inner, interceptor)) + } + /// Compress requests with the given encoding. + /// + /// This requires the server to support it otherwise it might respond with an + /// error. + #[must_use] + pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.inner = self.inner.send_compressed(encoding); + self + } + /// Enable decompressing responses. + #[must_use] + pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.inner = self.inner.accept_compressed(encoding); + self + } + /// Limits the maximum size of a decoded message. + /// + /// Default: `4MB` + #[must_use] + pub fn max_decoding_message_size(mut self, limit: usize) -> Self { + self.inner = self.inner.max_decoding_message_size(limit); + self + } + /// Limits the maximum size of an encoded message. + /// + /// Default: `usize::MAX` + #[must_use] + pub fn max_encoding_message_size(mut self, limit: usize) -> Self { + self.inner = self.inner.max_encoding_message_size(limit); + self + } + /// Executes a request that contains the query and returns a stream of [[Response]]. + /// + /// It is guaranteed that there is at least one ARROW batch returned even if the result set is empty. + pub async fn execute_plan( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response>, + tonic::Status, + > { + self.inner.ready().await.map_err(|e| { + tonic::Status::unknown(format!("Service was not ready: {}", e.into())) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/spark.connect.SparkConnectService/ExecutePlan", + ); + let mut req = request.into_request(); + req.extensions_mut().insert(GrpcMethod::new( + "spark.connect.SparkConnectService", + "ExecutePlan", + )); + self.inner.server_streaming(req, path, codec).await + } + /// Analyzes a query and returns a [[AnalyzeResponse]] containing metadata about the query. + pub async fn analyze_plan( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result, tonic::Status> + { + self.inner.ready().await.map_err(|e| { + tonic::Status::unknown(format!("Service was not ready: {}", e.into())) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/spark.connect.SparkConnectService/AnalyzePlan", + ); + let mut req = request.into_request(); + req.extensions_mut().insert(GrpcMethod::new( + "spark.connect.SparkConnectService", + "AnalyzePlan", + )); + self.inner.unary(req, path, codec).await + } + /// Update or fetch the configurations and returns a [[ConfigResponse]] containing the result. + pub async fn config( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result, tonic::Status> { + self.inner.ready().await.map_err(|e| { + tonic::Status::unknown(format!("Service was not ready: {}", e.into())) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = + http::uri::PathAndQuery::from_static("/spark.connect.SparkConnectService/Config"); + let mut req = request.into_request(); + req.extensions_mut().insert(GrpcMethod::new( + "spark.connect.SparkConnectService", + "Config", + )); + self.inner.unary(req, path, codec).await + } + /// Add artifacts to the session and returns a [[AddArtifactsResponse]] containing metadata about + /// the added artifacts. + pub async fn add_artifacts( + &mut self, + request: impl tonic::IntoStreamingRequest, + ) -> std::result::Result, tonic::Status> + { + self.inner.ready().await.map_err(|e| { + tonic::Status::unknown(format!("Service was not ready: {}", e.into())) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/spark.connect.SparkConnectService/AddArtifacts", + ); + let mut req = request.into_streaming_request(); + req.extensions_mut().insert(GrpcMethod::new( + "spark.connect.SparkConnectService", + "AddArtifacts", + )); + self.inner.client_streaming(req, path, codec).await + } + /// Check statuses of artifacts in the session and returns them in a [[ArtifactStatusesResponse]] + pub async fn artifact_status( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result, tonic::Status> + { + self.inner.ready().await.map_err(|e| { + tonic::Status::unknown(format!("Service was not ready: {}", e.into())) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/spark.connect.SparkConnectService/ArtifactStatus", + ); + let mut req = request.into_request(); + req.extensions_mut().insert(GrpcMethod::new( + "spark.connect.SparkConnectService", + "ArtifactStatus", + )); + self.inner.unary(req, path, codec).await + } + /// Interrupts running executions + pub async fn interrupt( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result, tonic::Status> { + self.inner.ready().await.map_err(|e| { + tonic::Status::unknown(format!("Service was not ready: {}", e.into())) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/spark.connect.SparkConnectService/Interrupt", + ); + let mut req = request.into_request(); + req.extensions_mut().insert(GrpcMethod::new( + "spark.connect.SparkConnectService", + "Interrupt", + )); + self.inner.unary(req, path, codec).await + } + /// Reattach to an existing reattachable execution. + /// The ExecutePlan must have been started with ReattachOptions.reattachable=true. + /// If the ExecutePlanResponse stream ends without a ResultComplete message, there is more to + /// continue. If there is a ResultComplete, the client should use ReleaseExecute with + pub async fn reattach_execute( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response>, + tonic::Status, + > { + self.inner.ready().await.map_err(|e| { + tonic::Status::unknown(format!("Service was not ready: {}", e.into())) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/spark.connect.SparkConnectService/ReattachExecute", + ); + let mut req = request.into_request(); + req.extensions_mut().insert(GrpcMethod::new( + "spark.connect.SparkConnectService", + "ReattachExecute", + )); + self.inner.server_streaming(req, path, codec).await + } + /// Release an reattachable execution, or parts thereof. + /// The ExecutePlan must have been started with ReattachOptions.reattachable=true. + /// Non reattachable executions are released automatically and immediately after the ExecutePlan + /// RPC and ReleaseExecute may not be used. + pub async fn release_execute( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result, tonic::Status> + { + self.inner.ready().await.map_err(|e| { + tonic::Status::unknown(format!("Service was not ready: {}", e.into())) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/spark.connect.SparkConnectService/ReleaseExecute", + ); + let mut req = request.into_request(); + req.extensions_mut().insert(GrpcMethod::new( + "spark.connect.SparkConnectService", + "ReleaseExecute", + )); + self.inner.unary(req, path, codec).await + } + /// Release a session. + /// All the executions in the session will be released. Any further requests for the session with + /// that session_id for the given user_id will fail. If the session didn't exist or was already + /// released, this is a noop. + pub async fn release_session( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result, tonic::Status> + { + self.inner.ready().await.map_err(|e| { + tonic::Status::unknown(format!("Service was not ready: {}", e.into())) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/spark.connect.SparkConnectService/ReleaseSession", + ); + let mut req = request.into_request(); + req.extensions_mut().insert(GrpcMethod::new( + "spark.connect.SparkConnectService", + "ReleaseSession", + )); + self.inner.unary(req, path, codec).await + } + /// FetchErrorDetails retrieves the matched exception with details based on a provided error id. + pub async fn fetch_error_details( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result, tonic::Status> + { + self.inner.ready().await.map_err(|e| { + tonic::Status::unknown(format!("Service was not ready: {}", e.into())) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/spark.connect.SparkConnectService/FetchErrorDetails", + ); + let mut req = request.into_request(); + req.extensions_mut().insert(GrpcMethod::new( + "spark.connect.SparkConnectService", + "FetchErrorDetails", + )); + self.inner.unary(req, path, codec).await + } + } +} +/// Generated server implementations. +pub mod spark_connect_service_server { + #![allow( + unused_variables, + dead_code, + missing_docs, + clippy::wildcard_imports, + clippy::let_unit_value + )] + use tonic::codegen::*; + /// Generated trait containing gRPC methods that should be implemented for use with SparkConnectServiceServer. + #[async_trait] + pub trait SparkConnectService: std::marker::Send + std::marker::Sync + 'static { + /// Server streaming response type for the ExecutePlan method. + type ExecutePlanStream: tonic::codegen::tokio_stream::Stream< + Item = std::result::Result, + > + std::marker::Send + + 'static; + /// Executes a request that contains the query and returns a stream of [[Response]]. + /// + /// It is guaranteed that there is at least one ARROW batch returned even if the result set is empty. + async fn execute_plan( + &self, + request: tonic::Request, + ) -> std::result::Result, tonic::Status>; + /// Analyzes a query and returns a [[AnalyzeResponse]] containing metadata about the query. + async fn analyze_plan( + &self, + request: tonic::Request, + ) -> std::result::Result, tonic::Status>; + /// Update or fetch the configurations and returns a [[ConfigResponse]] containing the result. + async fn config( + &self, + request: tonic::Request, + ) -> std::result::Result, tonic::Status>; + /// Add artifacts to the session and returns a [[AddArtifactsResponse]] containing metadata about + /// the added artifacts. + async fn add_artifacts( + &self, + request: tonic::Request>, + ) -> std::result::Result, tonic::Status>; + /// Check statuses of artifacts in the session and returns them in a [[ArtifactStatusesResponse]] + async fn artifact_status( + &self, + request: tonic::Request, + ) -> std::result::Result, tonic::Status>; + /// Interrupts running executions + async fn interrupt( + &self, + request: tonic::Request, + ) -> std::result::Result, tonic::Status>; + /// Server streaming response type for the ReattachExecute method. + type ReattachExecuteStream: tonic::codegen::tokio_stream::Stream< + Item = std::result::Result, + > + std::marker::Send + + 'static; + /// Reattach to an existing reattachable execution. + /// The ExecutePlan must have been started with ReattachOptions.reattachable=true. + /// If the ExecutePlanResponse stream ends without a ResultComplete message, there is more to + /// continue. If there is a ResultComplete, the client should use ReleaseExecute with + async fn reattach_execute( + &self, + request: tonic::Request, + ) -> std::result::Result, tonic::Status>; + /// Release an reattachable execution, or parts thereof. + /// The ExecutePlan must have been started with ReattachOptions.reattachable=true. + /// Non reattachable executions are released automatically and immediately after the ExecutePlan + /// RPC and ReleaseExecute may not be used. + async fn release_execute( + &self, + request: tonic::Request, + ) -> std::result::Result, tonic::Status>; + /// Release a session. + /// All the executions in the session will be released. Any further requests for the session with + /// that session_id for the given user_id will fail. If the session didn't exist or was already + /// released, this is a noop. + async fn release_session( + &self, + request: tonic::Request, + ) -> std::result::Result, tonic::Status>; + /// FetchErrorDetails retrieves the matched exception with details based on a provided error id. + async fn fetch_error_details( + &self, + request: tonic::Request, + ) -> std::result::Result, tonic::Status>; + } + /// Main interface for the SparkConnect service. + #[derive(Debug)] + pub struct SparkConnectServiceServer { + inner: Arc, + accept_compression_encodings: EnabledCompressionEncodings, + send_compression_encodings: EnabledCompressionEncodings, + max_decoding_message_size: Option, + max_encoding_message_size: Option, + } + impl SparkConnectServiceServer { + pub fn new(inner: T) -> Self { + Self::from_arc(Arc::new(inner)) + } + pub fn from_arc(inner: Arc) -> Self { + Self { + inner, + accept_compression_encodings: Default::default(), + send_compression_encodings: Default::default(), + max_decoding_message_size: None, + max_encoding_message_size: None, + } + } + pub fn with_interceptor(inner: T, interceptor: F) -> InterceptedService + where + F: tonic::service::Interceptor, + { + InterceptedService::new(Self::new(inner), interceptor) + } + /// Enable decompressing requests with the given encoding. + #[must_use] + pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.accept_compression_encodings.enable(encoding); + self + } + /// Compress responses with the given encoding, if the client supports it. + #[must_use] + pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self { + self.send_compression_encodings.enable(encoding); + self + } + /// Limits the maximum size of a decoded message. + /// + /// Default: `4MB` + #[must_use] + pub fn max_decoding_message_size(mut self, limit: usize) -> Self { + self.max_decoding_message_size = Some(limit); + self + } + /// Limits the maximum size of an encoded message. + /// + /// Default: `usize::MAX` + #[must_use] + pub fn max_encoding_message_size(mut self, limit: usize) -> Self { + self.max_encoding_message_size = Some(limit); + self + } + } + impl tonic::codegen::Service> for SparkConnectServiceServer + where + T: SparkConnectService, + B: Body + std::marker::Send + 'static, + B::Error: Into + std::marker::Send + 'static, + { + type Response = http::Response; + type Error = std::convert::Infallible; + type Future = BoxFuture; + fn poll_ready( + &mut self, + _cx: &mut Context<'_>, + ) -> Poll> { + Poll::Ready(Ok(())) + } + fn call(&mut self, req: http::Request) -> Self::Future { + match req.uri().path() { + "/spark.connect.SparkConnectService/ExecutePlan" => { + #[allow(non_camel_case_types)] + struct ExecutePlanSvc(pub Arc); + impl + tonic::server::ServerStreamingService + for ExecutePlanSvc + { + type Response = super::ExecutePlanResponse; + type ResponseStream = T::ExecutePlanStream; + type Future = + BoxFuture, tonic::Status>; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::execute_plan(&inner, request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = ExecutePlanSvc(inner); + let codec = tonic::codec::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.server_streaming(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/spark.connect.SparkConnectService/AnalyzePlan" => { + #[allow(non_camel_case_types)] + struct AnalyzePlanSvc(pub Arc); + impl + tonic::server::UnaryService + for AnalyzePlanSvc + { + type Response = super::AnalyzePlanResponse; + type Future = BoxFuture, tonic::Status>; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::analyze_plan(&inner, request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = AnalyzePlanSvc(inner); + let codec = tonic::codec::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/spark.connect.SparkConnectService/Config" => { + #[allow(non_camel_case_types)] + struct ConfigSvc(pub Arc); + impl tonic::server::UnaryService for ConfigSvc { + type Response = super::ConfigResponse; + type Future = BoxFuture, tonic::Status>; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::config(&inner, request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = ConfigSvc(inner); + let codec = tonic::codec::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/spark.connect.SparkConnectService/AddArtifacts" => { + #[allow(non_camel_case_types)] + struct AddArtifactsSvc(pub Arc); + impl + tonic::server::ClientStreamingService + for AddArtifactsSvc + { + type Response = super::AddArtifactsResponse; + type Future = BoxFuture, tonic::Status>; + fn call( + &mut self, + request: tonic::Request>, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::add_artifacts(&inner, request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = AddArtifactsSvc(inner); + let codec = tonic::codec::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.client_streaming(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/spark.connect.SparkConnectService/ArtifactStatus" => { + #[allow(non_camel_case_types)] + struct ArtifactStatusSvc(pub Arc); + impl + tonic::server::UnaryService + for ArtifactStatusSvc + { + type Response = super::ArtifactStatusesResponse; + type Future = BoxFuture, tonic::Status>; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::artifact_status(&inner, request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = ArtifactStatusSvc(inner); + let codec = tonic::codec::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/spark.connect.SparkConnectService/Interrupt" => { + #[allow(non_camel_case_types)] + struct InterruptSvc(pub Arc); + impl + tonic::server::UnaryService for InterruptSvc + { + type Response = super::InterruptResponse; + type Future = BoxFuture, tonic::Status>; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::interrupt(&inner, request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = InterruptSvc(inner); + let codec = tonic::codec::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/spark.connect.SparkConnectService/ReattachExecute" => { + #[allow(non_camel_case_types)] + struct ReattachExecuteSvc(pub Arc); + impl + tonic::server::ServerStreamingService + for ReattachExecuteSvc + { + type Response = super::ExecutePlanResponse; + type ResponseStream = T::ReattachExecuteStream; + type Future = + BoxFuture, tonic::Status>; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::reattach_execute(&inner, request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = ReattachExecuteSvc(inner); + let codec = tonic::codec::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.server_streaming(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/spark.connect.SparkConnectService/ReleaseExecute" => { + #[allow(non_camel_case_types)] + struct ReleaseExecuteSvc(pub Arc); + impl + tonic::server::UnaryService + for ReleaseExecuteSvc + { + type Response = super::ReleaseExecuteResponse; + type Future = BoxFuture, tonic::Status>; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::release_execute(&inner, request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = ReleaseExecuteSvc(inner); + let codec = tonic::codec::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/spark.connect.SparkConnectService/ReleaseSession" => { + #[allow(non_camel_case_types)] + struct ReleaseSessionSvc(pub Arc); + impl + tonic::server::UnaryService + for ReleaseSessionSvc + { + type Response = super::ReleaseSessionResponse; + type Future = BoxFuture, tonic::Status>; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::release_session(&inner, request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = ReleaseSessionSvc(inner); + let codec = tonic::codec::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + "/spark.connect.SparkConnectService/FetchErrorDetails" => { + #[allow(non_camel_case_types)] + struct FetchErrorDetailsSvc(pub Arc); + impl + tonic::server::UnaryService + for FetchErrorDetailsSvc + { + type Response = super::FetchErrorDetailsResponse; + type Future = BoxFuture, tonic::Status>; + fn call( + &mut self, + request: tonic::Request, + ) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::fetch_error_details(&inner, request) + .await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = FetchErrorDetailsSvc(inner); + let codec = tonic::codec::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } + _ => Box::pin(async move { + let mut response = http::Response::new(empty_body()); + let headers = response.headers_mut(); + headers.insert( + tonic::Status::GRPC_STATUS, + (tonic::Code::Unimplemented as i32).into(), + ); + headers.insert( + http::header::CONTENT_TYPE, + tonic::metadata::GRPC_CONTENT_TYPE, + ); + Ok(response) + }), + } + } + } + impl Clone for SparkConnectServiceServer { + fn clone(&self) -> Self { + let inner = self.inner.clone(); + Self { + inner, + accept_compression_encodings: self.accept_compression_encodings, + send_compression_encodings: self.send_compression_encodings, + max_decoding_message_size: self.max_decoding_message_size, + max_encoding_message_size: self.max_encoding_message_size, + } + } + } + /// Generated gRPC service name + pub const SERVICE_NAME: &str = "spark.connect.SparkConnectService"; + impl tonic::server::NamedService for SparkConnectServiceServer { + const NAME: &'static str = SERVICE_NAME; + } +} diff --git a/src/scripts/spark-connect-generation/Cargo.lock b/src/scripts/spark-connect-generation/Cargo.lock new file mode 100644 index 0000000000..39c219603a --- /dev/null +++ b/src/scripts/spark-connect-generation/Cargo.lock @@ -0,0 +1,403 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "anyhow" +version = "1.0.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" + +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + +[[package]] +name = "bytes" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "errno" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "fastrand" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" + +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + +[[package]] +name = "hashbrown" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "indexmap" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + +[[package]] +name = "libc" +version = "0.2.161" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" + +[[package]] +name = "linux-raw-sys" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "multimap" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03" + +[[package]] +name = "once_cell" +version = "1.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" + +[[package]] +name = "petgraph" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +dependencies = [ + "fixedbitset", + "indexmap", +] + +[[package]] +name = "prettyplease" +version = "0.2.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64d1ec885c64d0457d564db4ec299b2dae3f9c02808b8ad9c3a089c591b18033" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "prost" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-build" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15" +dependencies = [ + "bytes", + "heck", + "itertools", + "log", + "multimap", + "once_cell", + "petgraph", + "prettyplease", + "prost", + "prost-types", + "regex", + "syn", + "tempfile", +] + +[[package]] +name = "prost-derive" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" +dependencies = [ + "anyhow", + "itertools", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "prost-types" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670" +dependencies = [ + "prost", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "rustix" +version = "0.38.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "375116bee2be9ed569afe2154ea6a99dfdffd257f533f187498c2a8f5feaf4ee" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + +[[package]] +name = "spark-connect-script" +version = "0.0.1" +dependencies = [ + "tonic-build", +] + +[[package]] +name = "syn" +version = "2.0.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tempfile" +version = "3.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b" +dependencies = [ + "cfg-if", + "fastrand", + "once_cell", + "rustix", + "windows-sys 0.59.0", +] + +[[package]] +name = "tonic-build" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9557ce109ea773b399c9b9e5dca39294110b74f1f342cb347a80d1fce8c26a11" +dependencies = [ + "prettyplease", + "proc-macro2", + "prost-build", + "prost-types", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" diff --git a/src/scripts/spark-connect-generation/Cargo.toml b/src/scripts/spark-connect-generation/Cargo.toml new file mode 100644 index 0000000000..e2af3f89a7 --- /dev/null +++ b/src/scripts/spark-connect-generation/Cargo.toml @@ -0,0 +1,12 @@ +[build-dependencies] +tonic-build = {version = "0.12.3"} + +[package] +edition = "2021" +name = "spark-connect-generation-script" +version = "0.0.1" + +[package.metadata.cargo-machete] +ignored = ["tonic-build"] + +[workspace] diff --git a/src/scripts/spark-connect-generation/build.rs b/src/scripts/spark-connect-generation/build.rs new file mode 100644 index 0000000000..889286f9af --- /dev/null +++ b/src/scripts/spark-connect-generation/build.rs @@ -0,0 +1,28 @@ +fn main() -> std::io::Result<()> { + let mut config = tonic_build::Config::new(); + + // todo: having issues with prost_wkt_types on Windows + // config + // .type_attribute(".", "#[derive(serde::Serialize)]") + // .extern_path(".google.protobuf.Any", "::prost_wkt_types::Any") + // .extern_path(".google.protobuf.Timestamp", "::prost_wkt_types::Timestamp") + // .extern_path(".google.protobuf.Value", "::prost_wkt_types::Value"); + + tonic_build::configure() + .build_server(true) + .compile_protos_with_config( + config, + &[ + "proto/spark/connect/base.proto", + "proto/spark/connect/catalog.proto", + "proto/spark/connect/commands.proto", + "proto/spark/connect/common.proto", + "proto/spark/connect/expressions.proto", + "proto/spark/connect/relations.proto", + "proto/spark/connect/types.proto", + ], + &["proto"], + )?; + + Ok(()) +} diff --git a/src/scripts/spark-connect-generation/proto/spark/connect/base.proto b/src/scripts/spark-connect-generation/proto/spark/connect/base.proto new file mode 100644 index 0000000000..e3c84ddd5e --- /dev/null +++ b/src/scripts/spark-connect-generation/proto/spark/connect/base.proto @@ -0,0 +1,1103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = 'proto3'; + +package spark.connect; + +import "google/protobuf/any.proto"; +import "spark/connect/commands.proto"; +import "spark/connect/common.proto"; +import "spark/connect/expressions.proto"; +import "spark/connect/relations.proto"; +import "spark/connect/types.proto"; + +option java_multiple_files = true; +option java_package = "org.apache.spark.connect.proto"; +option go_package = "internal/generated"; + +// A [[Plan]] is the structure that carries the runtime information for the execution from the +// client to the server. A [[Plan]] can either be of the type [[Relation]] which is a reference +// to the underlying logical plan or it can be of the [[Command]] type that is used to execute +// commands on the server. +message Plan { + oneof op_type { + Relation root = 1; + Command command = 2; + } +} + + + +// User Context is used to refer to one particular user session that is executing +// queries in the backend. +message UserContext { + string user_id = 1; + string user_name = 2; + + // To extend the existing user context message that is used to identify incoming requests, + // Spark Connect leverages the Any protobuf type that can be used to inject arbitrary other + // messages into this message. Extensions are stored as a `repeated` type to be able to + // handle multiple active extensions. + repeated google.protobuf.Any extensions = 999; +} + +// Request to perform plan analyze, optionally to explain the plan. +message AnalyzePlanRequest { + // (Required) + // + // The session_id specifies a spark session for a user id (which is specified + // by user_context.user_id). The session_id is set by the client to be able to + // collate streaming responses from different queries within the dedicated session. + // The id should be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff` + string session_id = 1; + + // (Optional) + // + // Server-side generated idempotency key from the previous responses (if any). Server + // can use this to validate that the server side session has not changed. + optional string client_observed_server_side_session_id = 17; + + // (Required) User context + UserContext user_context = 2; + + // Provides optional information about the client sending the request. This field + // can be used for language or version specific information and is only intended for + // logging purposes and will not be interpreted by the server. + optional string client_type = 3; + + oneof analyze { + Schema schema = 4; + Explain explain = 5; + TreeString tree_string = 6; + IsLocal is_local = 7; + IsStreaming is_streaming = 8; + InputFiles input_files = 9; + SparkVersion spark_version = 10; + DDLParse ddl_parse = 11; + SameSemantics same_semantics = 12; + SemanticHash semantic_hash = 13; + Persist persist = 14; + Unpersist unpersist = 15; + GetStorageLevel get_storage_level = 16; + } + + message Schema { + // (Required) The logical plan to be analyzed. + Plan plan = 1; + } + + // Explains the input plan based on a configurable mode. + message Explain { + // (Required) The logical plan to be analyzed. + Plan plan = 1; + + // (Required) For analyzePlan rpc calls, configure the mode to explain plan in strings. + ExplainMode explain_mode = 2; + + // Plan explanation mode. + enum ExplainMode { + EXPLAIN_MODE_UNSPECIFIED = 0; + + // Generates only physical plan. + EXPLAIN_MODE_SIMPLE = 1; + + // Generates parsed logical plan, analyzed logical plan, optimized logical plan and physical plan. + // Parsed Logical plan is a unresolved plan that extracted from the query. Analyzed logical plans + // transforms which translates unresolvedAttribute and unresolvedRelation into fully typed objects. + // The optimized logical plan transforms through a set of optimization rules, resulting in the + // physical plan. + EXPLAIN_MODE_EXTENDED = 2; + + // Generates code for the statement, if any and a physical plan. + EXPLAIN_MODE_CODEGEN = 3; + + // If plan node statistics are available, generates a logical plan and also the statistics. + EXPLAIN_MODE_COST = 4; + + // Generates a physical plan outline and also node details. + EXPLAIN_MODE_FORMATTED = 5; + } + } + + message TreeString { + // (Required) The logical plan to be analyzed. + Plan plan = 1; + + // (Optional) Max level of the schema. + optional int32 level = 2; + } + + message IsLocal { + // (Required) The logical plan to be analyzed. + Plan plan = 1; + } + + message IsStreaming { + // (Required) The logical plan to be analyzed. + Plan plan = 1; + } + + message InputFiles { + // (Required) The logical plan to be analyzed. + Plan plan = 1; + } + + message SparkVersion { } + + message DDLParse { + // (Required) The DDL formatted string to be parsed. + string ddl_string = 1; + } + + + // Returns `true` when the logical query plans are equal and therefore return same results. + message SameSemantics { + // (Required) The plan to be compared. + Plan target_plan = 1; + + // (Required) The other plan to be compared. + Plan other_plan = 2; + } + + message SemanticHash { + // (Required) The logical plan to get a hashCode. + Plan plan = 1; + } + + message Persist { + // (Required) The logical plan to persist. + Relation relation = 1; + + // (Optional) The storage level. + optional StorageLevel storage_level = 2; + } + + message Unpersist { + // (Required) The logical plan to unpersist. + Relation relation = 1; + + // (Optional) Whether to block until all blocks are deleted. + optional bool blocking = 2; + } + + message GetStorageLevel { + // (Required) The logical plan to get the storage level. + Relation relation = 1; + } +} + +// Response to performing analysis of the query. Contains relevant metadata to be able to +// reason about the performance. +// Next ID: 16 +message AnalyzePlanResponse { + string session_id = 1; + // Server-side generated idempotency key that the client can use to assert that the server side + // session has not changed. + string server_side_session_id = 15; + + oneof result { + Schema schema = 2; + Explain explain = 3; + TreeString tree_string = 4; + IsLocal is_local = 5; + IsStreaming is_streaming = 6; + InputFiles input_files = 7; + SparkVersion spark_version = 8; + DDLParse ddl_parse = 9; + SameSemantics same_semantics = 10; + SemanticHash semantic_hash = 11; + Persist persist = 12; + Unpersist unpersist = 13; + GetStorageLevel get_storage_level = 14; + } + + message Schema { + DataType schema = 1; + } + + message Explain { + string explain_string = 1; + } + + message TreeString { + string tree_string = 1; + } + + message IsLocal { + bool is_local = 1; + } + + message IsStreaming { + bool is_streaming = 1; + } + + message InputFiles { + // A best-effort snapshot of the files that compose this Dataset + repeated string files = 1; + } + + message SparkVersion { + string version = 1; + } + + message DDLParse { + DataType parsed = 1; + } + + message SameSemantics { + bool result = 1; + } + + message SemanticHash { + int32 result = 1; + } + + message Persist { } + + message Unpersist { } + + message GetStorageLevel { + // (Required) The StorageLevel as a result of get_storage_level request. + StorageLevel storage_level = 1; + } +} + +// A request to be executed by the service. +message ExecutePlanRequest { + // (Required) + // + // The session_id specifies a spark session for a user id (which is specified + // by user_context.user_id). The session_id is set by the client to be able to + // collate streaming responses from different queries within the dedicated session. + // The id should be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff` + string session_id = 1; + + // (Optional) + // + // Server-side generated idempotency key from the previous responses (if any). Server + // can use this to validate that the server side session has not changed. + optional string client_observed_server_side_session_id = 8; + + // (Required) User context + // + // user_context.user_id and session+id both identify a unique remote spark session on the + // server side. + UserContext user_context = 2; + + // (Optional) + // Provide an id for this request. If not provided, it will be generated by the server. + // It is returned in every ExecutePlanResponse.operation_id of the ExecutePlan response stream. + // The id must be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff` + optional string operation_id = 6; + + // (Required) The logical plan to be executed / analyzed. + Plan plan = 3; + + // Provides optional information about the client sending the request. This field + // can be used for language or version specific information and is only intended for + // logging purposes and will not be interpreted by the server. + optional string client_type = 4; + + // Repeated element for options that can be passed to the request. This element is currently + // unused but allows to pass in an extension value used for arbitrary options. + repeated RequestOption request_options = 5; + + message RequestOption { + oneof request_option { + ReattachOptions reattach_options = 1; + // Extension type for request options + google.protobuf.Any extension = 999; + } + } + + // Tags to tag the given execution with. + // Tags cannot contain ',' character and cannot be empty strings. + // Used by Interrupt with interrupt.tag. + repeated string tags = 7; +} + +// The response of a query, can be one or more for each request. Responses belonging to the +// same input query, carry the same `session_id`. +// Next ID: 17 +message ExecutePlanResponse { + string session_id = 1; + // Server-side generated idempotency key that the client can use to assert that the server side + // session has not changed. + string server_side_session_id = 15; + + // Identifies the ExecutePlan execution. + // If set by the client in ExecutePlanRequest.operationId, that value is returned. + // Otherwise generated by the server. + // It is an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff` + string operation_id = 12; + + // Identified the response in the stream. + // The id is an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff` + string response_id = 13; + + // Union type for the different response messages. + oneof response_type { + ArrowBatch arrow_batch = 2; + + // Special case for executing SQL commands. + SqlCommandResult sql_command_result = 5; + + // Response for a streaming query. + WriteStreamOperationStartResult write_stream_operation_start_result = 8; + + // Response for commands on a streaming query. + StreamingQueryCommandResult streaming_query_command_result = 9; + + // Response for 'SparkContext.resources'. + GetResourcesCommandResult get_resources_command_result = 10; + + // Response for commands on the streaming query manager. + StreamingQueryManagerCommandResult streaming_query_manager_command_result = 11; + + // Response for commands on the client side streaming query listener. + StreamingQueryListenerEventsResult streaming_query_listener_events_result = 16; + + // Response type informing if the stream is complete in reattachable execution. + ResultComplete result_complete = 14; + + // Response for command that creates ResourceProfile. + CreateResourceProfileCommandResult create_resource_profile_command_result = 17; + + // (Optional) Intermediate query progress reports. + ExecutionProgress execution_progress = 18; + + // Response for command that checkpoints a DataFrame. + CheckpointCommandResult checkpoint_command_result = 19; + + // Support arbitrary result objects. + google.protobuf.Any extension = 999; + } + + // Metrics for the query execution. Typically, this field is only present in the last + // batch of results and then represent the overall state of the query execution. + Metrics metrics = 4; + + // The metrics observed during the execution of the query plan. + repeated ObservedMetrics observed_metrics = 6; + + // (Optional) The Spark schema. This field is available when `collect` is called. + DataType schema = 7; + + // A SQL command returns an opaque Relation that can be directly used as input for the next + // call. + message SqlCommandResult { + Relation relation = 1; + } + + // Batch results of metrics. + message ArrowBatch { + // Count rows in `data`. Must match the number of rows inside `data`. + int64 row_count = 1; + // Serialized Arrow data. + bytes data = 2; + + // If set, row offset of the start of this ArrowBatch in execution results. + optional int64 start_offset = 3; + } + + message Metrics { + + repeated MetricObject metrics = 1; + + message MetricObject { + string name = 1; + int64 plan_id = 2; + int64 parent = 3; + map execution_metrics = 4; + } + + message MetricValue { + string name = 1; + int64 value = 2; + string metric_type = 3; + } + } + + message ObservedMetrics { + string name = 1; + repeated Expression.Literal values = 2; + repeated string keys = 3; + int64 plan_id = 4; + } + + message ResultComplete { + // If present, in a reattachable execution this means that after server sends onComplete, + // the execution is complete. If the server sends onComplete without sending a ResultComplete, + // it means that there is more, and the client should use ReattachExecute RPC to continue. + } + + // This message is used to communicate progress about the query progress during the execution. + message ExecutionProgress { + // Captures the progress of each individual stage. + repeated StageInfo stages = 1; + + // Captures the currently in progress tasks. + int64 num_inflight_tasks = 2; + + message StageInfo { + int64 stage_id = 1; + int64 num_tasks = 2; + int64 num_completed_tasks = 3; + int64 input_bytes_read = 4; + bool done = 5; + } + } +} + +// The key-value pair for the config request and response. +message KeyValue { + // (Required) The key. + string key = 1; + // (Optional) The value. + optional string value = 2; +} + +// Request to update or fetch the configurations. +message ConfigRequest { + // (Required) + // + // The session_id specifies a spark session for a user id (which is specified + // by user_context.user_id). The session_id is set by the client to be able to + // collate streaming responses from different queries within the dedicated session. + // The id should be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff` + string session_id = 1; + + // (Optional) + // + // Server-side generated idempotency key from the previous responses (if any). Server + // can use this to validate that the server side session has not changed. + optional string client_observed_server_side_session_id = 8; + + // (Required) User context + UserContext user_context = 2; + + // (Required) The operation for the config. + Operation operation = 3; + + // Provides optional information about the client sending the request. This field + // can be used for language or version specific information and is only intended for + // logging purposes and will not be interpreted by the server. + optional string client_type = 4; + + message Operation { + oneof op_type { + Set set = 1; + Get get = 2; + GetWithDefault get_with_default = 3; + GetOption get_option = 4; + GetAll get_all = 5; + Unset unset = 6; + IsModifiable is_modifiable = 7; + } + } + + message Set { + // (Required) The config key-value pairs to set. + repeated KeyValue pairs = 1; + } + + message Get { + // (Required) The config keys to get. + repeated string keys = 1; + } + + message GetWithDefault { + // (Required) The config key-value pairs to get. The value will be used as the default value. + repeated KeyValue pairs = 1; + } + + message GetOption { + // (Required) The config keys to get optionally. + repeated string keys = 1; + } + + message GetAll { + // (Optional) The prefix of the config key to get. + optional string prefix = 1; + } + + message Unset { + // (Required) The config keys to unset. + repeated string keys = 1; + } + + message IsModifiable { + // (Required) The config keys to check the config is modifiable. + repeated string keys = 1; + } +} + +// Response to the config request. +// Next ID: 5 +message ConfigResponse { + string session_id = 1; + // Server-side generated idempotency key that the client can use to assert that the server side + // session has not changed. + string server_side_session_id = 4; + + // (Optional) The result key-value pairs. + // + // Available when the operation is 'Get', 'GetWithDefault', 'GetOption', 'GetAll'. + // Also available for the operation 'IsModifiable' with boolean string "true" and "false". + repeated KeyValue pairs = 2; + + // (Optional) + // + // Warning messages for deprecated or unsupported configurations. + repeated string warnings = 3; +} + +// Request to transfer client-local artifacts. +message AddArtifactsRequest { + + // (Required) + // + // The session_id specifies a spark session for a user id (which is specified + // by user_context.user_id). The session_id is set by the client to be able to + // collate streaming responses from different queries within the dedicated session. + // The id should be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff` + string session_id = 1; + + // User context + UserContext user_context = 2; + + // (Optional) + // + // Server-side generated idempotency key from the previous responses (if any). Server + // can use this to validate that the server side session has not changed. + optional string client_observed_server_side_session_id = 7; + + // Provides optional information about the client sending the request. This field + // can be used for language or version specific information and is only intended for + // logging purposes and will not be interpreted by the server. + optional string client_type = 6; + + // A chunk of an Artifact. + message ArtifactChunk { + // Data chunk. + bytes data = 1; + // CRC to allow server to verify integrity of the chunk. + int64 crc = 2; + } + + // An artifact that is contained in a single `ArtifactChunk`. + // Generally, this message represents tiny artifacts such as REPL-generated class files. + message SingleChunkArtifact { + // The name of the artifact is expected in the form of a "Relative Path" that is made up of a + // sequence of directories and the final file element. + // Examples of "Relative Path"s: "jars/test.jar", "classes/xyz.class", "abc.xyz", "a/b/X.jar". + // The server is expected to maintain the hierarchy of files as defined by their name. (i.e + // The relative path of the file on the server's filesystem will be the same as the name of + // the provided artifact) + string name = 1; + // A single data chunk. + ArtifactChunk data = 2; + } + + // A number of `SingleChunkArtifact` batched into a single RPC. + message Batch { + repeated SingleChunkArtifact artifacts = 1; + } + + // Signals the beginning/start of a chunked artifact. + // A large artifact is transferred through a payload of `BeginChunkedArtifact` followed by a + // sequence of `ArtifactChunk`s. + message BeginChunkedArtifact { + // Name of the artifact undergoing chunking. Follows the same conventions as the `name` in + // the `Artifact` message. + string name = 1; + // Total size of the artifact in bytes. + int64 total_bytes = 2; + // Number of chunks the artifact is split into. + // This includes the `initial_chunk`. + int64 num_chunks = 3; + // The first/initial chunk. + ArtifactChunk initial_chunk = 4; + } + + // The payload is either a batch of artifacts or a partial chunk of a large artifact. + oneof payload { + Batch batch = 3; + // The metadata and the initial chunk of a large artifact chunked into multiple requests. + // The server side is notified about the total size of the large artifact as well as the + // number of chunks to expect. + BeginChunkedArtifact begin_chunk = 4; + // A chunk of an artifact excluding metadata. This can be any chunk of a large artifact + // excluding the first chunk (which is included in `BeginChunkedArtifact`). + ArtifactChunk chunk = 5; + } +} + +// Response to adding an artifact. Contains relevant metadata to verify successful transfer of +// artifact(s). +// Next ID: 4 +message AddArtifactsResponse { + // Session id in which the AddArtifact was running. + string session_id = 2; + // Server-side generated idempotency key that the client can use to assert that the server side + // session has not changed. + string server_side_session_id = 3; + + // The list of artifact(s) seen by the server. + repeated ArtifactSummary artifacts = 1; + + // Metadata of an artifact. + message ArtifactSummary { + string name = 1; + // Whether the CRC (Cyclic Redundancy Check) is successful on server verification. + // The server discards any artifact that fails the CRC. + // If false, the client may choose to resend the artifact specified by `name`. + bool is_crc_successful = 2; + } +} + +// Request to get current statuses of artifacts at the server side. +message ArtifactStatusesRequest { + // (Required) + // + // The session_id specifies a spark session for a user id (which is specified + // by user_context.user_id). The session_id is set by the client to be able to + // collate streaming responses from different queries within the dedicated session. + // The id should be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff` + string session_id = 1; + + // (Optional) + // + // Server-side generated idempotency key from the previous responses (if any). Server + // can use this to validate that the server side session has not changed. + optional string client_observed_server_side_session_id = 5; + + // User context + UserContext user_context = 2; + + // Provides optional information about the client sending the request. This field + // can be used for language or version specific information and is only intended for + // logging purposes and will not be interpreted by the server. + optional string client_type = 3; + + // The name of the artifact is expected in the form of a "Relative Path" that is made up of a + // sequence of directories and the final file element. + // Examples of "Relative Path"s: "jars/test.jar", "classes/xyz.class", "abc.xyz", "a/b/X.jar". + // The server is expected to maintain the hierarchy of files as defined by their name. (i.e + // The relative path of the file on the server's filesystem will be the same as the name of + // the provided artifact) + repeated string names = 4; +} + +// Response to checking artifact statuses. +// Next ID: 4 +message ArtifactStatusesResponse { + // Session id in which the ArtifactStatus was running. + string session_id = 2; + // Server-side generated idempotency key that the client can use to assert that the server side + // session has not changed. + string server_side_session_id = 3; + // A map of artifact names to their statuses. + map statuses = 1; + + message ArtifactStatus { + // Exists or not particular artifact at the server. + bool exists = 1; + } +} + +message InterruptRequest { + // (Required) + // + // The session_id specifies a spark session for a user id (which is specified + // by user_context.user_id). The session_id is set by the client to be able to + // collate streaming responses from different queries within the dedicated session. + // The id should be an UUID string of the format `00112233-4455-6677-8899-aabbccddeeff` + string session_id = 1; + + // (Optional) + // + // Server-side generated idempotency key from the previous responses (if any). Server + // can use this to validate that the server side session has not changed. + optional string client_observed_server_side_session_id = 7; + + // (Required) User context + UserContext user_context = 2; + + // Provides optional information about the client sending the request. This field + // can be used for language or version specific information and is only intended for + // logging purposes and will not be interpreted by the server. + optional string client_type = 3; + + // (Required) The type of interrupt to execute. + InterruptType interrupt_type = 4; + + enum InterruptType { + INTERRUPT_TYPE_UNSPECIFIED = 0; + + // Interrupt all running executions within the session with the provided session_id. + INTERRUPT_TYPE_ALL = 1; + + // Interrupt all running executions within the session with the provided operation_tag. + INTERRUPT_TYPE_TAG = 2; + + // Interrupt the running execution within the session with the provided operation_id. + INTERRUPT_TYPE_OPERATION_ID = 3; + } + + oneof interrupt { + // if interrupt_tag == INTERRUPT_TYPE_TAG, interrupt operation with this tag. + string operation_tag = 5; + + // if interrupt_tag == INTERRUPT_TYPE_OPERATION_ID, interrupt operation with this operation_id. + string operation_id = 6; + } +} + +// Next ID: 4 +message InterruptResponse { + // Session id in which the interrupt was running. + string session_id = 1; + // Server-side generated idempotency key that the client can use to assert that the server side + // session has not changed. + string server_side_session_id = 3; + + // Operation ids of the executions which were interrupted. + repeated string interrupted_ids = 2; + +} + +message ReattachOptions { + // If true, the request can be reattached to using ReattachExecute. + // ReattachExecute can be used either if the stream broke with a GRPC network error, + // or if the server closed the stream without sending a response with StreamStatus.complete=true. + // The server will keep a buffer of responses in case a response is lost, and + // ReattachExecute needs to back-track. + // + // If false, the execution response stream will will not be reattachable, and all responses are + // immediately released by the server after being sent. + bool reattachable = 1; +} + +message ReattachExecuteRequest { + // (Required) + // + // The session_id of the request to reattach to. + // This must be an id of existing session. + string session_id = 1; + + // (Optional) + // + // Server-side generated idempotency key from the previous responses (if any). Server + // can use this to validate that the server side session has not changed. + optional string client_observed_server_side_session_id = 6; + + // (Required) User context + // + // user_context.user_id and session+id both identify a unique remote spark session on the + // server side. + UserContext user_context = 2; + + // (Required) + // Provide an id of the request to reattach to. + // This must be an id of existing operation. + string operation_id = 3; + + // Provides optional information about the client sending the request. This field + // can be used for language or version specific information and is only intended for + // logging purposes and will not be interpreted by the server. + optional string client_type = 4; + + // (Optional) + // Last already processed response id from the response stream. + // After reattach, server will resume the response stream after that response. + // If not specified, server will restart the stream from the start. + // + // Note: server controls the amount of responses that it buffers and it may drop responses, + // that are far behind the latest returned response, so this can't be used to arbitrarily + // scroll back the cursor. If the response is no longer available, this will result in an error. + optional string last_response_id = 5; +} + +message ReleaseExecuteRequest { + // (Required) + // + // The session_id of the request to reattach to. + // This must be an id of existing session. + string session_id = 1; + + // (Optional) + // + // Server-side generated idempotency key from the previous responses (if any). Server + // can use this to validate that the server side session has not changed. + optional string client_observed_server_side_session_id = 7; + + // (Required) User context + // + // user_context.user_id and session+id both identify a unique remote spark session on the + // server side. + UserContext user_context = 2; + + // (Required) + // Provide an id of the request to reattach to. + // This must be an id of existing operation. + string operation_id = 3; + + // Provides optional information about the client sending the request. This field + // can be used for language or version specific information and is only intended for + // logging purposes and will not be interpreted by the server. + optional string client_type = 4; + + // Release and close operation completely. + // This will also interrupt the query if it is running execution, and wait for it to be torn down. + message ReleaseAll {} + + // Release all responses from the operation response stream up to and including + // the response with the given by response_id. + // While server determines by itself how much of a buffer of responses to keep, client providing + // explicit release calls will help reduce resource consumption. + // Noop if response_id not found in cached responses. + message ReleaseUntil { + string response_id = 1; + } + + oneof release { + ReleaseAll release_all = 5; + ReleaseUntil release_until = 6; + } +} + +// Next ID: 4 +message ReleaseExecuteResponse { + // Session id in which the release was running. + string session_id = 1; + // Server-side generated idempotency key that the client can use to assert that the server side + // session has not changed. + string server_side_session_id = 3; + + // Operation id of the operation on which the release executed. + // If the operation couldn't be found (because e.g. it was concurrently released), will be unset. + // Otherwise, it will be equal to the operation_id from request. + optional string operation_id = 2; +} + +message ReleaseSessionRequest { + // (Required) + // + // The session_id of the request to reattach to. + // This must be an id of existing session. + string session_id = 1; + + // (Required) User context + // + // user_context.user_id and session+id both identify a unique remote spark session on the + // server side. + UserContext user_context = 2; + + // Provides optional information about the client sending the request. This field + // can be used for language or version specific information and is only intended for + // logging purposes and will not be interpreted by the server. + optional string client_type = 3; +} + +// Next ID: 3 +message ReleaseSessionResponse { + // Session id of the session on which the release executed. + string session_id = 1; + // Server-side generated idempotency key that the client can use to assert that the server side + // session has not changed. + string server_side_session_id = 2; +} + +message FetchErrorDetailsRequest { + + // (Required) + // The session_id specifies a Spark session for a user identified by user_context.user_id. + // The id should be a UUID string of the format `00112233-4455-6677-8899-aabbccddeeff`. + string session_id = 1; + + // (Optional) + // + // Server-side generated idempotency key from the previous responses (if any). Server + // can use this to validate that the server side session has not changed. + optional string client_observed_server_side_session_id = 5; + + // User context + UserContext user_context = 2; + + // (Required) + // The id of the error. + string error_id = 3; + + // Provides optional information about the client sending the request. This field + // can be used for language or version specific information and is only intended for + // logging purposes and will not be interpreted by the server. + optional string client_type = 4; +} + +// Next ID: 5 +message FetchErrorDetailsResponse { + + // Server-side generated idempotency key that the client can use to assert that the server side + // session has not changed. + string server_side_session_id = 3; + + string session_id = 4; + + // The index of the root error in errors. The field will not be set if the error is not found. + optional int32 root_error_idx = 1; + + // A list of errors. + repeated Error errors = 2; + + message StackTraceElement { + // The fully qualified name of the class containing the execution point. + string declaring_class = 1; + + // The name of the method containing the execution point. + string method_name = 2; + + // The name of the file containing the execution point. + optional string file_name = 3; + + // The line number of the source line containing the execution point. + int32 line_number = 4; + } + + // QueryContext defines the schema for the query context of a SparkThrowable. + // It helps users understand where the error occurs while executing queries. + message QueryContext { + // The type of this query context. + enum ContextType { + SQL = 0; + DATAFRAME = 1; + } + ContextType context_type = 10; + + // The object type of the query which throws the exception. + // If the exception is directly from the main query, it should be an empty string. + // Otherwise, it should be the exact object type in upper case. For example, a "VIEW". + string object_type = 1; + + // The object name of the query which throws the exception. + // If the exception is directly from the main query, it should be an empty string. + // Otherwise, it should be the object name. For example, a view name "V1". + string object_name = 2; + + // The starting index in the query text which throws the exception. The index starts from 0. + int32 start_index = 3; + + // The stopping index in the query which throws the exception. The index starts from 0. + int32 stop_index = 4; + + // The corresponding fragment of the query which throws the exception. + string fragment = 5; + + // The user code (call site of the API) that caused throwing the exception. + string call_site = 6; + + // Summary of the exception cause. + string summary = 7; + } + + // SparkThrowable defines the schema for SparkThrowable exceptions. + message SparkThrowable { + // Succinct, human-readable, unique, and consistent representation of the error category. + optional string error_class = 1; + + // The message parameters for the error framework. + map message_parameters = 2; + + // The query context of a SparkThrowable. + repeated QueryContext query_contexts = 3; + + // Portable error identifier across SQL engines + // If null, error class or SQLSTATE is not set. + optional string sql_state = 4; + } + + // Error defines the schema for the representing exception. + message Error { + // The fully qualified names of the exception class and its parent classes. + repeated string error_type_hierarchy = 1; + + // The detailed message of the exception. + string message = 2; + + // The stackTrace of the exception. It will be set + // if the SQLConf spark.sql.connect.serverStacktrace.enabled is true. + repeated StackTraceElement stack_trace = 3; + + // The index of the cause error in errors. + optional int32 cause_idx = 4; + + // The structured data of a SparkThrowable exception. + optional SparkThrowable spark_throwable = 5; + } +} + +message CheckpointCommandResult { + // (Required) The logical plan checkpointed. + CachedRemoteRelation relation = 1; +} + +// Main interface for the SparkConnect service. +service SparkConnectService { + + // Executes a request that contains the query and returns a stream of [[Response]]. + // + // It is guaranteed that there is at least one ARROW batch returned even if the result set is empty. + rpc ExecutePlan(ExecutePlanRequest) returns (stream ExecutePlanResponse) {} + + // Analyzes a query and returns a [[AnalyzeResponse]] containing metadata about the query. + rpc AnalyzePlan(AnalyzePlanRequest) returns (AnalyzePlanResponse) {} + + // Update or fetch the configurations and returns a [[ConfigResponse]] containing the result. + rpc Config(ConfigRequest) returns (ConfigResponse) {} + + // Add artifacts to the session and returns a [[AddArtifactsResponse]] containing metadata about + // the added artifacts. + rpc AddArtifacts(stream AddArtifactsRequest) returns (AddArtifactsResponse) {} + + // Check statuses of artifacts in the session and returns them in a [[ArtifactStatusesResponse]] + rpc ArtifactStatus(ArtifactStatusesRequest) returns (ArtifactStatusesResponse) {} + + // Interrupts running executions + rpc Interrupt(InterruptRequest) returns (InterruptResponse) {} + + // Reattach to an existing reattachable execution. + // The ExecutePlan must have been started with ReattachOptions.reattachable=true. + // If the ExecutePlanResponse stream ends without a ResultComplete message, there is more to + // continue. If there is a ResultComplete, the client should use ReleaseExecute with + rpc ReattachExecute(ReattachExecuteRequest) returns (stream ExecutePlanResponse) {} + + // Release an reattachable execution, or parts thereof. + // The ExecutePlan must have been started with ReattachOptions.reattachable=true. + // Non reattachable executions are released automatically and immediately after the ExecutePlan + // RPC and ReleaseExecute may not be used. + rpc ReleaseExecute(ReleaseExecuteRequest) returns (ReleaseExecuteResponse) {} + + // Release a session. + // All the executions in the session will be released. Any further requests for the session with + // that session_id for the given user_id will fail. If the session didn't exist or was already + // released, this is a noop. + rpc ReleaseSession(ReleaseSessionRequest) returns (ReleaseSessionResponse) {} + + // FetchErrorDetails retrieves the matched exception with details based on a provided error id. + rpc FetchErrorDetails(FetchErrorDetailsRequest) returns (FetchErrorDetailsResponse) {} +} diff --git a/src/scripts/spark-connect-generation/proto/spark/connect/catalog.proto b/src/scripts/spark-connect-generation/proto/spark/connect/catalog.proto new file mode 100644 index 0000000000..5b1b90b008 --- /dev/null +++ b/src/scripts/spark-connect-generation/proto/spark/connect/catalog.proto @@ -0,0 +1,243 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = 'proto3'; + +package spark.connect; + +import "spark/connect/common.proto"; +import "spark/connect/types.proto"; + +option java_multiple_files = true; +option java_package = "org.apache.spark.connect.proto"; +option go_package = "internal/generated"; + +// Catalog messages are marked as unstable. +message Catalog { + oneof cat_type { + CurrentDatabase current_database = 1; + SetCurrentDatabase set_current_database = 2; + ListDatabases list_databases = 3; + ListTables list_tables = 4; + ListFunctions list_functions = 5; + ListColumns list_columns = 6; + GetDatabase get_database = 7; + GetTable get_table = 8; + GetFunction get_function = 9; + DatabaseExists database_exists = 10; + TableExists table_exists = 11; + FunctionExists function_exists = 12; + CreateExternalTable create_external_table = 13; + CreateTable create_table = 14; + DropTempView drop_temp_view = 15; + DropGlobalTempView drop_global_temp_view = 16; + RecoverPartitions recover_partitions = 17; + IsCached is_cached = 18; + CacheTable cache_table = 19; + UncacheTable uncache_table = 20; + ClearCache clear_cache = 21; + RefreshTable refresh_table = 22; + RefreshByPath refresh_by_path = 23; + CurrentCatalog current_catalog = 24; + SetCurrentCatalog set_current_catalog = 25; + ListCatalogs list_catalogs = 26; + } +} + +// See `spark.catalog.currentDatabase` +message CurrentDatabase { } + +// See `spark.catalog.setCurrentDatabase` +message SetCurrentDatabase { + // (Required) + string db_name = 1; +} + +// See `spark.catalog.listDatabases` +message ListDatabases { + // (Optional) The pattern that the database name needs to match + optional string pattern = 1; +} + +// See `spark.catalog.listTables` +message ListTables { + // (Optional) + optional string db_name = 1; + // (Optional) The pattern that the table name needs to match + optional string pattern = 2; +} + +// See `spark.catalog.listFunctions` +message ListFunctions { + // (Optional) + optional string db_name = 1; + // (Optional) The pattern that the function name needs to match + optional string pattern = 2; +} + +// See `spark.catalog.listColumns` +message ListColumns { + // (Required) + string table_name = 1; + // (Optional) + optional string db_name = 2; +} + +// See `spark.catalog.getDatabase` +message GetDatabase { + // (Required) + string db_name = 1; +} + +// See `spark.catalog.getTable` +message GetTable { + // (Required) + string table_name = 1; + // (Optional) + optional string db_name = 2; +} + +// See `spark.catalog.getFunction` +message GetFunction { + // (Required) + string function_name = 1; + // (Optional) + optional string db_name = 2; +} + +// See `spark.catalog.databaseExists` +message DatabaseExists { + // (Required) + string db_name = 1; +} + +// See `spark.catalog.tableExists` +message TableExists { + // (Required) + string table_name = 1; + // (Optional) + optional string db_name = 2; +} + +// See `spark.catalog.functionExists` +message FunctionExists { + // (Required) + string function_name = 1; + // (Optional) + optional string db_name = 2; +} + +// See `spark.catalog.createExternalTable` +message CreateExternalTable { + // (Required) + string table_name = 1; + // (Optional) + optional string path = 2; + // (Optional) + optional string source = 3; + // (Optional) + optional DataType schema = 4; + // Options could be empty for valid data source format. + // The map key is case insensitive. + map options = 5; +} + +// See `spark.catalog.createTable` +message CreateTable { + // (Required) + string table_name = 1; + // (Optional) + optional string path = 2; + // (Optional) + optional string source = 3; + // (Optional) + optional string description = 4; + // (Optional) + optional DataType schema = 5; + // Options could be empty for valid data source format. + // The map key is case insensitive. + map options = 6; +} + +// See `spark.catalog.dropTempView` +message DropTempView { + // (Required) + string view_name = 1; +} + +// See `spark.catalog.dropGlobalTempView` +message DropGlobalTempView { + // (Required) + string view_name = 1; +} + +// See `spark.catalog.recoverPartitions` +message RecoverPartitions { + // (Required) + string table_name = 1; +} + +// See `spark.catalog.isCached` +message IsCached { + // (Required) + string table_name = 1; +} + +// See `spark.catalog.cacheTable` +message CacheTable { + // (Required) + string table_name = 1; + + // (Optional) + optional StorageLevel storage_level = 2; +} + +// See `spark.catalog.uncacheTable` +message UncacheTable { + // (Required) + string table_name = 1; +} + +// See `spark.catalog.clearCache` +message ClearCache { } + +// See `spark.catalog.refreshTable` +message RefreshTable { + // (Required) + string table_name = 1; +} + +// See `spark.catalog.refreshByPath` +message RefreshByPath { + // (Required) + string path = 1; +} + +// See `spark.catalog.currentCatalog` +message CurrentCatalog { } + +// See `spark.catalog.setCurrentCatalog` +message SetCurrentCatalog { + // (Required) + string catalog_name = 1; +} + +// See `spark.catalog.listCatalogs` +message ListCatalogs { + // (Optional) The pattern that the catalog name needs to match + optional string pattern = 1; +} diff --git a/src/scripts/spark-connect-generation/proto/spark/connect/commands.proto b/src/scripts/spark-connect-generation/proto/spark/connect/commands.proto new file mode 100644 index 0000000000..71189a3c43 --- /dev/null +++ b/src/scripts/spark-connect-generation/proto/spark/connect/commands.proto @@ -0,0 +1,533 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = 'proto3'; + +import "google/protobuf/any.proto"; +import "spark/connect/common.proto"; +import "spark/connect/expressions.proto"; +import "spark/connect/relations.proto"; + +package spark.connect; + +option java_multiple_files = true; +option java_package = "org.apache.spark.connect.proto"; +option go_package = "internal/generated"; + +// A [[Command]] is an operation that is executed by the server that does not directly consume or +// produce a relational result. +message Command { + oneof command_type { + CommonInlineUserDefinedFunction register_function = 1; + WriteOperation write_operation = 2; + CreateDataFrameViewCommand create_dataframe_view = 3; + WriteOperationV2 write_operation_v2 = 4; + SqlCommand sql_command = 5; + WriteStreamOperationStart write_stream_operation_start = 6; + StreamingQueryCommand streaming_query_command = 7; + GetResourcesCommand get_resources_command = 8; + StreamingQueryManagerCommand streaming_query_manager_command = 9; + CommonInlineUserDefinedTableFunction register_table_function = 10; + StreamingQueryListenerBusCommand streaming_query_listener_bus_command = 11; + CommonInlineUserDefinedDataSource register_data_source = 12; + CreateResourceProfileCommand create_resource_profile_command = 13; + CheckpointCommand checkpoint_command = 14; + RemoveCachedRemoteRelationCommand remove_cached_remote_relation_command = 15; + MergeIntoTableCommand merge_into_table_command = 16; + + // This field is used to mark extensions to the protocol. When plugins generate arbitrary + // Commands they can add them here. During the planning the correct resolution is done. + google.protobuf.Any extension = 999; + + } +} + +// A SQL Command is used to trigger the eager evaluation of SQL commands in Spark. +// +// When the SQL provide as part of the message is a command it will be immediately evaluated +// and the result will be collected and returned as part of a LocalRelation. If the result is +// not a command, the operation will simply return a SQL Relation. This allows the client to be +// almost oblivious to the server-side behavior. +message SqlCommand { + // (Required) SQL Query. + string sql = 1 [deprecated=true]; + + // (Optional) A map of parameter names to literal expressions. + map args = 2 [deprecated=true]; + + // (Optional) A sequence of literal expressions for positional parameters in the SQL query text. + repeated Expression.Literal pos_args = 3 [deprecated=true]; + + // (Optional) A map of parameter names to expressions. + // It cannot coexist with `pos_arguments`. + map named_arguments = 4 [deprecated=true]; + + // (Optional) A sequence of expressions for positional parameters in the SQL query text. + // It cannot coexist with `named_arguments`. + repeated Expression pos_arguments = 5 [deprecated=true]; + + // (Optional) The relation that this SQL command will be built on. + Relation input = 6; +} + +// A command that can create DataFrame global temp view or local temp view. +message CreateDataFrameViewCommand { + // (Required) The relation that this view will be built on. + Relation input = 1; + + // (Required) View name. + string name = 2; + + // (Required) Whether this is global temp view or local temp view. + bool is_global = 3; + + // (Required) + // + // If true, and if the view already exists, updates it; if false, and if the view + // already exists, throws exception. + bool replace = 4; +} + +// As writes are not directly handled during analysis and planning, they are modeled as commands. +message WriteOperation { + // (Required) The output of the `input` relation will be persisted according to the options. + Relation input = 1; + + // (Optional) Format value according to the Spark documentation. Examples are: text, parquet, delta. + optional string source = 2; + + // (Optional) + // + // The destination of the write operation can be either a path or a table. + // If the destination is neither a path nor a table, such as jdbc and noop, + // the `save_type` should not be set. + oneof save_type { + string path = 3; + SaveTable table = 4; + } + + // (Required) the save mode. + SaveMode mode = 5; + + // (Optional) List of columns to sort the output by. + repeated string sort_column_names = 6; + + // (Optional) List of columns for partitioning. + repeated string partitioning_columns = 7; + + // (Optional) Bucketing specification. Bucketing must set the number of buckets and the columns + // to bucket by. + BucketBy bucket_by = 8; + + // (Optional) A list of configuration options. + map options = 9; + + // (Optional) Columns used for clustering the table. + repeated string clustering_columns = 10; + + message SaveTable { + // (Required) The table name. + string table_name = 1; + // (Required) The method to be called to write to the table. + TableSaveMethod save_method = 2; + + enum TableSaveMethod { + TABLE_SAVE_METHOD_UNSPECIFIED = 0; + TABLE_SAVE_METHOD_SAVE_AS_TABLE = 1; + TABLE_SAVE_METHOD_INSERT_INTO = 2; + } + } + + message BucketBy { + repeated string bucket_column_names = 1; + int32 num_buckets = 2; + } + + enum SaveMode { + SAVE_MODE_UNSPECIFIED = 0; + SAVE_MODE_APPEND = 1; + SAVE_MODE_OVERWRITE = 2; + SAVE_MODE_ERROR_IF_EXISTS = 3; + SAVE_MODE_IGNORE = 4; + } +} + +// As writes are not directly handled during analysis and planning, they are modeled as commands. +message WriteOperationV2 { + // (Required) The output of the `input` relation will be persisted according to the options. + Relation input = 1; + + // (Required) The destination of the write operation must be either a path or a table. + string table_name = 2; + + // (Optional) A provider for the underlying output data source. Spark's default catalog supports + // "parquet", "json", etc. + optional string provider = 3; + + // (Optional) List of columns for partitioning for output table created by `create`, + // `createOrReplace`, or `replace` + repeated Expression partitioning_columns = 4; + + // (Optional) A list of configuration options. + map options = 5; + + // (Optional) A list of table properties. + map table_properties = 6; + + // (Required) Write mode. + Mode mode = 7; + + enum Mode { + MODE_UNSPECIFIED = 0; + MODE_CREATE = 1; + MODE_OVERWRITE = 2; + MODE_OVERWRITE_PARTITIONS = 3; + MODE_APPEND = 4; + MODE_REPLACE = 5; + MODE_CREATE_OR_REPLACE = 6; + } + + // (Optional) A condition for overwrite saving mode + Expression overwrite_condition = 8; + + // (Optional) Columns used for clustering the table. + repeated string clustering_columns = 9; +} + +// Starts write stream operation as streaming query. Query ID and Run ID of the streaming +// query are returned. +message WriteStreamOperationStart { + + // (Required) The output of the `input` streaming relation will be written. + Relation input = 1; + + // The following fields directly map to API for DataStreamWriter(). + // Consult API documentation unless explicitly documented here. + + string format = 2; + map options = 3; + repeated string partitioning_column_names = 4; + + oneof trigger { + string processing_time_interval = 5; + bool available_now = 6; + bool once = 7; + string continuous_checkpoint_interval = 8; + } + + string output_mode = 9; + string query_name = 10; + + // The destination is optional. When set, it can be a path or a table name. + oneof sink_destination { + string path = 11; + string table_name = 12; + } + + StreamingForeachFunction foreach_writer = 13; + StreamingForeachFunction foreach_batch = 14; + + // (Optional) Columns used for clustering the table. + repeated string clustering_column_names = 15; +} + +message StreamingForeachFunction { + oneof function { + PythonUDF python_function = 1; + ScalarScalaUDF scala_function = 2; + } +} + +message WriteStreamOperationStartResult { + + // (Required) Query instance. See `StreamingQueryInstanceId`. + StreamingQueryInstanceId query_id = 1; + + // An optional query name. + string name = 2; + + // Optional query started event if there is any listener registered on the client side. + optional string query_started_event_json = 3; + + // TODO: How do we indicate errors? + // TODO: Consider adding status, last progress etc here. +} + +// A tuple that uniquely identifies an instance of streaming query run. It consists of `id` that +// persists across the streaming runs and `run_id` that changes between each run of the +// streaming query that resumes from the checkpoint. +message StreamingQueryInstanceId { + + // (Required) The unique id of this query that persists across restarts from checkpoint data. + // That is, this id is generated when a query is started for the first time, and + // will be the same every time it is restarted from checkpoint data. + string id = 1; + + // (Required) The unique id of this run of the query. That is, every start/restart of a query + // will generate a unique run_id. Therefore, every time a query is restarted from + // checkpoint, it will have the same `id` but different `run_id`s. + string run_id = 2; +} + +// Commands for a streaming query. +message StreamingQueryCommand { + + // (Required) Query instance. See `StreamingQueryInstanceId`. + StreamingQueryInstanceId query_id = 1; + + // See documentation for the corresponding API method in StreamingQuery. + oneof command { + // status() API. + bool status = 2; + // lastProgress() API. + bool last_progress = 3; + // recentProgress() API. + bool recent_progress = 4; + // stop() API. Stops the query. + bool stop = 5; + // processAllAvailable() API. Waits till all the available data is processed + bool process_all_available = 6; + // explain() API. Returns logical and physical plans. + ExplainCommand explain = 7; + // exception() API. Returns the exception in the query if any. + bool exception = 8; + // awaitTermination() API. Waits for the termination of the query. + AwaitTerminationCommand await_termination = 9; + } + + message ExplainCommand { + // TODO: Consider reusing Explain from AnalyzePlanRequest message. + // We can not do this right now since it base.proto imports this file. + bool extended = 1; + } + + message AwaitTerminationCommand { + optional int64 timeout_ms = 2; + } +} + +// Response for commands on a streaming query. +message StreamingQueryCommandResult { + // (Required) Query instance id. See `StreamingQueryInstanceId`. + StreamingQueryInstanceId query_id = 1; + + oneof result_type { + StatusResult status = 2; + RecentProgressResult recent_progress = 3; + ExplainResult explain = 4; + ExceptionResult exception = 5; + AwaitTerminationResult await_termination = 6; + } + + message StatusResult { + // See documentation for these Scala 'StreamingQueryStatus' struct + string status_message = 1; + bool is_data_available = 2; + bool is_trigger_active = 3; + bool is_active = 4; + } + + message RecentProgressResult { + // Progress reports as an array of json strings. + repeated string recent_progress_json = 5; + } + + message ExplainResult { + // Logical and physical plans as string + string result = 1; + } + + message ExceptionResult { + // (Optional) Exception message as string, maps to the return value of original + // StreamingQueryException's toString method + optional string exception_message = 1; + // (Optional) Exception error class as string + optional string error_class = 2; + // (Optional) Exception stack trace as string + optional string stack_trace = 3; + } + + message AwaitTerminationResult { + bool terminated = 1; + } +} + +// Commands for the streaming query manager. +message StreamingQueryManagerCommand { + + // See documentation for the corresponding API method in StreamingQueryManager. + oneof command { + // active() API, returns a list of active queries. + bool active = 1; + // get() API, returns the StreamingQuery identified by id. + string get_query = 2; + // awaitAnyTermination() API, wait until any query terminates or timeout. + AwaitAnyTerminationCommand await_any_termination = 3; + // resetTerminated() API. + bool reset_terminated = 4; + // addListener API. + StreamingQueryListenerCommand add_listener = 5; + // removeListener API. + StreamingQueryListenerCommand remove_listener = 6; + // listListeners() API, returns a list of streaming query listeners. + bool list_listeners = 7; + } + + message AwaitAnyTerminationCommand { + // (Optional) The waiting time in milliseconds to wait for any query to terminate. + optional int64 timeout_ms = 1; + } + + message StreamingQueryListenerCommand { + bytes listener_payload = 1; + optional PythonUDF python_listener_payload = 2; + string id = 3; + } +} + +// Response for commands on the streaming query manager. +message StreamingQueryManagerCommandResult { + oneof result_type { + ActiveResult active = 1; + StreamingQueryInstance query = 2; + AwaitAnyTerminationResult await_any_termination = 3; + bool reset_terminated = 4; + bool add_listener = 5; + bool remove_listener = 6; + ListStreamingQueryListenerResult list_listeners = 7; + } + + message ActiveResult { + repeated StreamingQueryInstance active_queries = 1; + } + + message StreamingQueryInstance { + // (Required) The id and runId of this query. + StreamingQueryInstanceId id = 1; + // (Optional) The name of this query. + optional string name = 2; + } + + message AwaitAnyTerminationResult { + bool terminated = 1; + } + + message StreamingQueryListenerInstance { + bytes listener_payload = 1; + } + + message ListStreamingQueryListenerResult { + // (Required) Reference IDs of listener instances. + repeated string listener_ids = 1; + } +} + +// The protocol for client-side StreamingQueryListener. +// This command will only be set when either the first listener is added to the client, or the last +// listener is removed from the client. +// The add_listener_bus_listener command will only be set true in the first case. +// The remove_listener_bus_listener command will only be set true in the second case. +message StreamingQueryListenerBusCommand { + oneof command { + bool add_listener_bus_listener = 1; + bool remove_listener_bus_listener = 2; + } +} + +// The enum used for client side streaming query listener event +// There is no QueryStartedEvent defined here, +// it is added as a field in WriteStreamOperationStartResult +enum StreamingQueryEventType { + QUERY_PROGRESS_UNSPECIFIED = 0; + QUERY_PROGRESS_EVENT = 1; + QUERY_TERMINATED_EVENT = 2; + QUERY_IDLE_EVENT = 3; +} + +// The protocol for the returned events in the long-running response channel. +message StreamingQueryListenerEvent { + // (Required) The json serialized event, all StreamingQueryListener events have a json method + string event_json = 1; + // (Required) Query event type used by client to decide how to deserialize the event_json + StreamingQueryEventType event_type = 2; +} + +message StreamingQueryListenerEventsResult { + repeated StreamingQueryListenerEvent events = 1; + optional bool listener_bus_listener_added = 2; +} + +// Command to get the output of 'SparkContext.resources' +message GetResourcesCommand { } + +// Response for command 'GetResourcesCommand'. +message GetResourcesCommandResult { + map resources = 1; +} + +// Command to create ResourceProfile +message CreateResourceProfileCommand { + // (Required) The ResourceProfile to be built on the server-side. + ResourceProfile profile = 1; +} + +// Response for command 'CreateResourceProfileCommand'. +message CreateResourceProfileCommandResult { + // (Required) Server-side generated resource profile id. + int32 profile_id = 1; +} + +// Command to remove `CashedRemoteRelation` +message RemoveCachedRemoteRelationCommand { + // (Required) The remote to be related + CachedRemoteRelation relation = 1; +} + +message CheckpointCommand { + // (Required) The logical plan to checkpoint. + Relation relation = 1; + + // (Required) Locally checkpoint using a local temporary + // directory in Spark Connect server (Spark Driver) + bool local = 2; + + // (Required) Whether to checkpoint this dataframe immediately. + bool eager = 3; +} + +message MergeIntoTableCommand { + // (Required) The name of the target table. + string target_table_name = 1; + + // (Required) The relation of the source table. + Relation source_table_plan = 2; + + // (Required) The condition to match the source and target. + Expression merge_condition = 3; + + // (Optional) The actions to be taken when the condition is matched. + repeated Expression match_actions = 4; + + // (Optional) The actions to be taken when the condition is not matched. + repeated Expression not_matched_actions = 5; + + // (Optional) The actions to be taken when the condition is not matched by source. + repeated Expression not_matched_by_source_actions = 6; + + // (Required) Whether to enable schema evolution. + bool with_schema_evolution = 7; +} diff --git a/src/scripts/spark-connect-generation/proto/spark/connect/common.proto b/src/scripts/spark-connect-generation/proto/spark/connect/common.proto new file mode 100644 index 0000000000..b2848370b0 --- /dev/null +++ b/src/scripts/spark-connect-generation/proto/spark/connect/common.proto @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = 'proto3'; + +package spark.connect; + +option java_multiple_files = true; +option java_package = "org.apache.spark.connect.proto"; +option go_package = "internal/generated"; + +// StorageLevel for persisting Datasets/Tables. +message StorageLevel { + // (Required) Whether the cache should use disk or not. + bool use_disk = 1; + // (Required) Whether the cache should use memory or not. + bool use_memory = 2; + // (Required) Whether the cache should use off-heap or not. + bool use_off_heap = 3; + // (Required) Whether the cached data is deserialized or not. + bool deserialized = 4; + // (Required) The number of replicas. + int32 replication = 5; +} + + +// ResourceInformation to hold information about a type of Resource. +// The corresponding class is 'org.apache.spark.resource.ResourceInformation' +message ResourceInformation { + // (Required) The name of the resource + string name = 1; + // (Required) An array of strings describing the addresses of the resource. + repeated string addresses = 2; +} + +// An executor resource request. +message ExecutorResourceRequest { + // (Required) resource name. + string resource_name = 1; + + // (Required) resource amount requesting. + int64 amount = 2; + + // Optional script used to discover the resources. + optional string discovery_script = 3; + + // Optional vendor, required for some cluster managers. + optional string vendor = 4; +} + +// A task resource request. +message TaskResourceRequest { + // (Required) resource name. + string resource_name = 1; + + // (Required) resource amount requesting as a double to support fractional + // resource requests. + double amount = 2; +} + +message ResourceProfile { + // (Optional) Resource requests for executors. Mapped from the resource name + // (e.g., cores, memory, CPU) to its specific request. + map executor_resources = 1; + + // (Optional) Resource requests for tasks. Mapped from the resource name + // (e.g., cores, memory, CPU) to its specific request. + map task_resources = 2; +} + +message Origin { + // (Required) Indicate the origin type. + oneof function { + PythonOrigin python_origin = 1; + } +} + +message PythonOrigin { + // (Required) Name of the origin, for example, the name of the function + string fragment = 1; + + // (Required) Callsite to show to end users, for example, stacktrace. + string call_site = 2; +} diff --git a/src/scripts/spark-connect-generation/proto/spark/connect/example_plugins.proto b/src/scripts/spark-connect-generation/proto/spark/connect/example_plugins.proto new file mode 100644 index 0000000000..caf233e75e --- /dev/null +++ b/src/scripts/spark-connect-generation/proto/spark/connect/example_plugins.proto @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = 'proto3'; + +import "spark/connect/relations.proto"; +import "spark/connect/expressions.proto"; +option go_package = "internal/generated"; + +package spark.connect; + +option java_multiple_files = true; +option java_package = "org.apache.spark.connect.proto"; + +message ExamplePluginRelation { + Relation input = 1; + string custom_field = 2; + +} + +message ExamplePluginExpression { + Expression child = 1; + string custom_field = 2; +} + +message ExamplePluginCommand { + string custom_field = 1; +} diff --git a/src/scripts/spark-connect-generation/proto/spark/connect/expressions.proto b/src/scripts/spark-connect-generation/proto/spark/connect/expressions.proto new file mode 100644 index 0000000000..3a91371fd3 --- /dev/null +++ b/src/scripts/spark-connect-generation/proto/spark/connect/expressions.proto @@ -0,0 +1,453 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = 'proto3'; + +import "google/protobuf/any.proto"; +import "spark/connect/types.proto"; +import "spark/connect/common.proto"; + +package spark.connect; + +option java_multiple_files = true; +option java_package = "org.apache.spark.connect.proto"; +option go_package = "internal/generated"; + +// Expression used to refer to fields, functions and similar. This can be used everywhere +// expressions in SQL appear. +message Expression { + + ExpressionCommon common = 18; + oneof expr_type { + Literal literal = 1; + UnresolvedAttribute unresolved_attribute = 2; + UnresolvedFunction unresolved_function = 3; + ExpressionString expression_string = 4; + UnresolvedStar unresolved_star = 5; + Alias alias = 6; + Cast cast = 7; + UnresolvedRegex unresolved_regex = 8; + SortOrder sort_order = 9; + LambdaFunction lambda_function = 10; + Window window = 11; + UnresolvedExtractValue unresolved_extract_value = 12; + UpdateFields update_fields = 13; + UnresolvedNamedLambdaVariable unresolved_named_lambda_variable = 14; + CommonInlineUserDefinedFunction common_inline_user_defined_function = 15; + CallFunction call_function = 16; + NamedArgumentExpression named_argument_expression = 17; + MergeAction merge_action = 19; + TypedAggregateExpression typed_aggregate_expression = 20; + + // This field is used to mark extensions to the protocol. When plugins generate arbitrary + // relations they can add them here. During the planning the correct resolution is done. + google.protobuf.Any extension = 999; + } + + + // Expression for the OVER clause or WINDOW clause. + message Window { + + // (Required) The window function. + Expression window_function = 1; + + // (Optional) The way that input rows are partitioned. + repeated Expression partition_spec = 2; + + // (Optional) Ordering of rows in a partition. + repeated SortOrder order_spec = 3; + + // (Optional) Window frame in a partition. + // + // If not set, it will be treated as 'UnspecifiedFrame'. + WindowFrame frame_spec = 4; + + // The window frame + message WindowFrame { + + // (Required) The type of the frame. + FrameType frame_type = 1; + + // (Required) The lower bound of the frame. + FrameBoundary lower = 2; + + // (Required) The upper bound of the frame. + FrameBoundary upper = 3; + + enum FrameType { + FRAME_TYPE_UNDEFINED = 0; + + // RowFrame treats rows in a partition individually. + FRAME_TYPE_ROW = 1; + + // RangeFrame treats rows in a partition as groups of peers. + // All rows having the same 'ORDER BY' ordering are considered as peers. + FRAME_TYPE_RANGE = 2; + } + + message FrameBoundary { + oneof boundary { + // CURRENT ROW boundary + bool current_row = 1; + + // UNBOUNDED boundary. + // For lower bound, it will be converted to 'UnboundedPreceding'. + // for upper bound, it will be converted to 'UnboundedFollowing'. + bool unbounded = 2; + + // This is an expression for future proofing. We are expecting literals on the server side. + Expression value = 3; + } + } + } + } + + // SortOrder is used to specify the data ordering, it is normally used in Sort and Window. + // It is an unevaluable expression and cannot be evaluated, so can not be used in Projection. + message SortOrder { + // (Required) The expression to be sorted. + Expression child = 1; + + // (Required) The sort direction, should be ASCENDING or DESCENDING. + SortDirection direction = 2; + + // (Required) How to deal with NULLs, should be NULLS_FIRST or NULLS_LAST. + NullOrdering null_ordering = 3; + + enum SortDirection { + SORT_DIRECTION_UNSPECIFIED = 0; + SORT_DIRECTION_ASCENDING = 1; + SORT_DIRECTION_DESCENDING = 2; + } + + enum NullOrdering { + SORT_NULLS_UNSPECIFIED = 0; + SORT_NULLS_FIRST = 1; + SORT_NULLS_LAST = 2; + } + } + + message Cast { + // (Required) the expression to be casted. + Expression expr = 1; + + // (Required) the data type that the expr to be casted to. + oneof cast_to_type { + DataType type = 2; + // If this is set, Server will use Catalyst parser to parse this string to DataType. + string type_str = 3; + } + + // (Optional) The expression evaluation mode. + EvalMode eval_mode = 4; + + enum EvalMode { + EVAL_MODE_UNSPECIFIED = 0; + EVAL_MODE_LEGACY = 1; + EVAL_MODE_ANSI = 2; + EVAL_MODE_TRY = 3; + } + } + + message Literal { + oneof literal_type { + DataType null = 1; + bytes binary = 2; + bool boolean = 3; + + int32 byte = 4; + int32 short = 5; + int32 integer = 6; + int64 long = 7; + float float = 10; + double double = 11; + Decimal decimal = 12; + + string string = 13; + + // Date in units of days since the UNIX epoch. + int32 date = 16; + // Timestamp in units of microseconds since the UNIX epoch. + int64 timestamp = 17; + // Timestamp in units of microseconds since the UNIX epoch (without timezone information). + int64 timestamp_ntz = 18; + + CalendarInterval calendar_interval = 19; + int32 year_month_interval = 20; + int64 day_time_interval = 21; + Array array = 22; + Map map = 23; + Struct struct = 24; + } + + message Decimal { + // the string representation. + string value = 1; + // The maximum number of digits allowed in the value. + // the maximum precision is 38. + optional int32 precision = 2; + // declared scale of decimal literal + optional int32 scale = 3; + } + + message CalendarInterval { + int32 months = 1; + int32 days = 2; + int64 microseconds = 3; + } + + message Array { + DataType element_type = 1; + repeated Literal elements = 2; + } + + message Map { + DataType key_type = 1; + DataType value_type = 2; + repeated Literal keys = 3; + repeated Literal values = 4; + } + + message Struct { + DataType struct_type = 1; + repeated Literal elements = 2; + } + } + + // An unresolved attribute that is not explicitly bound to a specific column, but the column + // is resolved during analysis by name. + message UnresolvedAttribute { + // (Required) An identifier that will be parsed by Catalyst parser. This should follow the + // Spark SQL identifier syntax. + string unparsed_identifier = 1; + + // (Optional) The id of corresponding connect plan. + optional int64 plan_id = 2; + + // (Optional) The requested column is a metadata column. + optional bool is_metadata_column = 3; + } + + // An unresolved function is not explicitly bound to one explicit function, but the function + // is resolved during analysis following Sparks name resolution rules. + message UnresolvedFunction { + // (Required) name (or unparsed name for user defined function) for the unresolved function. + string function_name = 1; + + // (Optional) Function arguments. Empty arguments are allowed. + repeated Expression arguments = 2; + + // (Required) Indicate if this function should be applied on distinct values. + bool is_distinct = 3; + + // (Required) Indicate if this is a user defined function. + // + // When it is not a user defined function, Connect will use the function name directly. + // When it is a user defined function, Connect will parse the function name first. + bool is_user_defined_function = 4; + } + + // Expression as string. + message ExpressionString { + // (Required) A SQL expression that will be parsed by Catalyst parser. + string expression = 1; + } + + // UnresolvedStar is used to expand all the fields of a relation or struct. + message UnresolvedStar { + + // (Optional) The target of the expansion. + // + // If set, it should end with '.*' and will be parsed by 'parseAttributeName' + // in the server side. + optional string unparsed_target = 1; + + // (Optional) The id of corresponding connect plan. + optional int64 plan_id = 2; + } + + // Represents all of the input attributes to a given relational operator, for example in + // "SELECT `(id)?+.+` FROM ...". + message UnresolvedRegex { + // (Required) The column name used to extract column with regex. + string col_name = 1; + + // (Optional) The id of corresponding connect plan. + optional int64 plan_id = 2; + } + + // Extracts a value or values from an Expression + message UnresolvedExtractValue { + // (Required) The expression to extract value from, can be + // Map, Array, Struct or array of Structs. + Expression child = 1; + + // (Required) The expression to describe the extraction, can be + // key of Map, index of Array, field name of Struct. + Expression extraction = 2; + } + + // Add, replace or drop a field of `StructType` expression by name. + message UpdateFields { + // (Required) The struct expression. + Expression struct_expression = 1; + + // (Required) The field name. + string field_name = 2; + + // (Optional) The expression to add or replace. + // + // When not set, it means this field will be dropped. + Expression value_expression = 3; + } + + message Alias { + // (Required) The expression that alias will be added on. + Expression expr = 1; + + // (Required) a list of name parts for the alias. + // + // Scalar columns only has one name that presents. + repeated string name = 2; + + // (Optional) Alias metadata expressed as a JSON map. + optional string metadata = 3; + } + + message LambdaFunction { + // (Required) The lambda function. + // + // The function body should use 'UnresolvedAttribute' as arguments, the sever side will + // replace 'UnresolvedAttribute' with 'UnresolvedNamedLambdaVariable'. + Expression function = 1; + + // (Required) Function variables. Must contains 1 ~ 3 variables. + repeated Expression.UnresolvedNamedLambdaVariable arguments = 2; + } + + message UnresolvedNamedLambdaVariable { + + // (Required) a list of name parts for the variable. Must not be empty. + repeated string name_parts = 1; + } +} + +message ExpressionCommon { + // (Required) Keep the information of the origin for this expression such as stacktrace. + Origin origin = 1; +} + +message CommonInlineUserDefinedFunction { + // (Required) Name of the user-defined function. + string function_name = 1; + // (Optional) Indicate if the user-defined function is deterministic. + bool deterministic = 2; + // (Optional) Function arguments. Empty arguments are allowed. + repeated Expression arguments = 3; + // (Required) Indicate the function type of the user-defined function. + oneof function { + PythonUDF python_udf = 4; + ScalarScalaUDF scalar_scala_udf = 5; + JavaUDF java_udf = 6; + } +} + +message PythonUDF { + // (Required) Output type of the Python UDF + DataType output_type = 1; + // (Required) EvalType of the Python UDF + int32 eval_type = 2; + // (Required) The encoded commands of the Python UDF + bytes command = 3; + // (Required) Python version being used in the client. + string python_ver = 4; + // (Optional) Additional includes for the Python UDF. + repeated string additional_includes = 5; +} + +message ScalarScalaUDF { + // (Required) Serialized JVM object containing UDF definition, input encoders and output encoder + bytes payload = 1; + // (Optional) Input type(s) of the UDF + repeated DataType inputTypes = 2; + // (Required) Output type of the UDF + DataType outputType = 3; + // (Required) True if the UDF can return null value + bool nullable = 4; + // (Required) Indicate if the UDF is an aggregate function + bool aggregate = 5; +} + +message JavaUDF { + // (Required) Fully qualified name of Java class + string class_name = 1; + + // (Optional) Output type of the Java UDF + optional DataType output_type = 2; + + // (Required) Indicate if the Java user-defined function is an aggregate function + bool aggregate = 3; +} + +message TypedAggregateExpression { + // (Required) The aggregate function object packed into bytes. + ScalarScalaUDF scalar_scala_udf = 1; +} + +message CallFunction { + // (Required) Unparsed name of the SQL function. + string function_name = 1; + + // (Optional) Function arguments. Empty arguments are allowed. + repeated Expression arguments = 2; +} + +message NamedArgumentExpression { + // (Required) The key of the named argument. + string key = 1; + + // (Required) The value expression of the named argument. + Expression value = 2; +} + +message MergeAction { + // (Required) The action type of the merge action. + ActionType action_type = 1; + + // (Optional) The condition expression of the merge action. + optional Expression condition = 2; + + // (Optional) The assignments of the merge action. Required for ActionTypes INSERT and UPDATE. + repeated Assignment assignments = 3; + + enum ActionType { + ACTION_TYPE_INVALID = 0; + ACTION_TYPE_DELETE = 1; + ACTION_TYPE_INSERT = 2; + ACTION_TYPE_INSERT_STAR = 3; + ACTION_TYPE_UPDATE = 4; + ACTION_TYPE_UPDATE_STAR = 5; + } + + message Assignment { + // (Required) The key of the assignment. + Expression key = 1; + + // (Required) The value of the assignment. + Expression value = 2; + } +} diff --git a/src/scripts/spark-connect-generation/proto/spark/connect/relations.proto b/src/scripts/spark-connect-generation/proto/spark/connect/relations.proto new file mode 100644 index 0000000000..1003e5c21d --- /dev/null +++ b/src/scripts/spark-connect-generation/proto/spark/connect/relations.proto @@ -0,0 +1,1133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = 'proto3'; + +package spark.connect; + +import "google/protobuf/any.proto"; +import "spark/connect/expressions.proto"; +import "spark/connect/types.proto"; +import "spark/connect/catalog.proto"; +import "spark/connect/common.proto"; + +option java_multiple_files = true; +option java_package = "org.apache.spark.connect.proto"; +option go_package = "internal/generated"; + +// The main [[Relation]] type. Fundamentally, a relation is a typed container +// that has exactly one explicit relation type set. +// +// When adding new relation types, they have to be registered here. +message Relation { + RelationCommon common = 1; + oneof rel_type { + Read read = 2; + Project project = 3; + Filter filter = 4; + Join join = 5; + SetOperation set_op = 6; + Sort sort = 7; + Limit limit = 8; + Aggregate aggregate = 9; + SQL sql = 10; + LocalRelation local_relation = 11; + Sample sample = 12; + Offset offset = 13; + Deduplicate deduplicate = 14; + Range range = 15; + SubqueryAlias subquery_alias = 16; + Repartition repartition = 17; + ToDF to_df = 18; + WithColumnsRenamed with_columns_renamed = 19; + ShowString show_string = 20; + Drop drop = 21; + Tail tail = 22; + WithColumns with_columns = 23; + Hint hint = 24; + Unpivot unpivot = 25; + ToSchema to_schema = 26; + RepartitionByExpression repartition_by_expression = 27; + MapPartitions map_partitions = 28; + CollectMetrics collect_metrics = 29; + Parse parse = 30; + GroupMap group_map = 31; + CoGroupMap co_group_map = 32; + WithWatermark with_watermark = 33; + ApplyInPandasWithState apply_in_pandas_with_state = 34; + HtmlString html_string = 35; + CachedLocalRelation cached_local_relation = 36; + CachedRemoteRelation cached_remote_relation = 37; + CommonInlineUserDefinedTableFunction common_inline_user_defined_table_function = 38; + AsOfJoin as_of_join = 39; + CommonInlineUserDefinedDataSource common_inline_user_defined_data_source = 40; + WithRelations with_relations = 41; + Transpose transpose = 42; + + // NA functions + NAFill fill_na = 90; + NADrop drop_na = 91; + NAReplace replace = 92; + + // stat functions + StatSummary summary = 100; + StatCrosstab crosstab = 101; + StatDescribe describe = 102; + StatCov cov = 103; + StatCorr corr = 104; + StatApproxQuantile approx_quantile = 105; + StatFreqItems freq_items = 106; + StatSampleBy sample_by = 107; + + // Catalog API (experimental / unstable) + Catalog catalog = 200; + + // This field is used to mark extensions to the protocol. When plugins generate arbitrary + // relations they can add them here. During the planning the correct resolution is done. + google.protobuf.Any extension = 998; + Unknown unknown = 999; + } +} + +// Used for testing purposes only. +message Unknown {} + +// Common metadata of all relations. +message RelationCommon { + // (Required) Shared relation metadata. + string source_info = 1 [deprecated=true]; + + // (Optional) A per-client globally unique id for a given connect plan. + optional int64 plan_id = 2; + + // (Optional) Keep the information of the origin for this expression such as stacktrace. + Origin origin = 3; +} + +// Relation that uses a SQL query to generate the output. +message SQL { + // (Required) The SQL query. + string query = 1; + + // (Optional) A map of parameter names to literal expressions. + map args = 2 [deprecated=true]; + + // (Optional) A sequence of literal expressions for positional parameters in the SQL query text. + repeated Expression.Literal pos_args = 3 [deprecated=true]; + + // (Optional) A map of parameter names to expressions. + // It cannot coexist with `pos_arguments`. + map named_arguments = 4; + + // (Optional) A sequence of expressions for positional parameters in the SQL query text. + // It cannot coexist with `named_arguments`. + repeated Expression pos_arguments = 5; +} + +// Relation of type [[WithRelations]]. +// +// This relation contains a root plan, and one or more references that are used by the root plan. +// There are two ways of referencing a relation, by name (through a subquery alias), or by plan_id +// (using RelationCommon.plan_id). +// +// This relation can be used to implement CTEs, describe DAGs, or to reduce tree depth. +message WithRelations { + // (Required) Plan at the root of the query tree. This plan is expected to contain one or more + // references. Those references get expanded later on by the engine. + Relation root = 1; + + // (Required) Plans referenced by the root plan. Relations in this list are also allowed to + // contain references to other relations in this list, as long they do not form cycles. + repeated Relation references = 2; +} + +// Relation that reads from a file / table or other data source. Does not have additional +// inputs. +message Read { + oneof read_type { + NamedTable named_table = 1; + DataSource data_source = 2; + } + + // (Optional) Indicates if this is a streaming read. + bool is_streaming = 3; + + message NamedTable { + // (Required) Unparsed identifier for the table. + string unparsed_identifier = 1; + + // Options for the named table. The map key is case insensitive. + map options = 2; + } + + message DataSource { + // (Optional) Supported formats include: parquet, orc, text, json, parquet, csv, avro. + // + // If not set, the value from SQL conf 'spark.sql.sources.default' will be used. + optional string format = 1; + + // (Optional) If not set, Spark will infer the schema. + // + // This schema string should be either DDL-formatted or JSON-formatted. + optional string schema = 2; + + // Options for the data source. The context of this map varies based on the + // data source format. This options could be empty for valid data source format. + // The map key is case insensitive. + map options = 3; + + // (Optional) A list of path for file-system backed data sources. + repeated string paths = 4; + + // (Optional) Condition in the where clause for each partition. + // + // This is only supported by the JDBC data source. + repeated string predicates = 5; + } +} + +// Projection of a bag of expressions for a given input relation. +// +// The input relation must be specified. +// The projected expression can be an arbitrary expression. +message Project { + // (Optional) Input relation is optional for Project. + // + // For example, `SELECT ABS(-1)` is valid plan without an input plan. + Relation input = 1; + + // (Required) A Project requires at least one expression. + repeated Expression expressions = 3; +} + +// Relation that applies a boolean expression `condition` on each row of `input` to produce +// the output result. +message Filter { + // (Required) Input relation for a Filter. + Relation input = 1; + + // (Required) A Filter must have a condition expression. + Expression condition = 2; +} + +// Relation of type [[Join]]. +// +// `left` and `right` must be present. +message Join { + // (Required) Left input relation for a Join. + Relation left = 1; + + // (Required) Right input relation for a Join. + Relation right = 2; + + // (Optional) The join condition. Could be unset when `using_columns` is utilized. + // + // This field does not co-exist with using_columns. + Expression join_condition = 3; + + // (Required) The join type. + JoinType join_type = 4; + + // Optional. using_columns provides a list of columns that should present on both sides of + // the join inputs that this Join will join on. For example A JOIN B USING col_name is + // equivalent to A JOIN B on A.col_name = B.col_name. + // + // This field does not co-exist with join_condition. + repeated string using_columns = 5; + + enum JoinType { + JOIN_TYPE_UNSPECIFIED = 0; + JOIN_TYPE_INNER = 1; + JOIN_TYPE_FULL_OUTER = 2; + JOIN_TYPE_LEFT_OUTER = 3; + JOIN_TYPE_RIGHT_OUTER = 4; + JOIN_TYPE_LEFT_ANTI = 5; + JOIN_TYPE_LEFT_SEMI = 6; + JOIN_TYPE_CROSS = 7; + } + + // (Optional) Only used by joinWith. Set the left and right join data types. + optional JoinDataType join_data_type = 6; + + message JoinDataType { + // If the left data type is a struct. + bool is_left_struct = 1; + // If the right data type is a struct. + bool is_right_struct = 2; + } +} + +// Relation of type [[SetOperation]] +message SetOperation { + // (Required) Left input relation for a Set operation. + Relation left_input = 1; + + // (Required) Right input relation for a Set operation. + Relation right_input = 2; + + // (Required) The Set operation type. + SetOpType set_op_type = 3; + + // (Optional) If to remove duplicate rows. + // + // True to preserve all results. + // False to remove duplicate rows. + optional bool is_all = 4; + + // (Optional) If to perform the Set operation based on name resolution. + // + // Only UNION supports this option. + optional bool by_name = 5; + + // (Optional) If to perform the Set operation and allow missing columns. + // + // Only UNION supports this option. + optional bool allow_missing_columns = 6; + + enum SetOpType { + SET_OP_TYPE_UNSPECIFIED = 0; + SET_OP_TYPE_INTERSECT = 1; + SET_OP_TYPE_UNION = 2; + SET_OP_TYPE_EXCEPT = 3; + } +} + +// Relation of type [[Limit]] that is used to `limit` rows from the input relation. +message Limit { + // (Required) Input relation for a Limit. + Relation input = 1; + + // (Required) the limit. + int32 limit = 2; +} + +// Relation of type [[Offset]] that is used to read rows staring from the `offset` on +// the input relation. +message Offset { + // (Required) Input relation for an Offset. + Relation input = 1; + + // (Required) the limit. + int32 offset = 2; +} + +// Relation of type [[Tail]] that is used to fetch `limit` rows from the last of the input relation. +message Tail { + // (Required) Input relation for an Tail. + Relation input = 1; + + // (Required) the limit. + int32 limit = 2; +} + +// Relation of type [[Aggregate]]. +message Aggregate { + // (Required) Input relation for a RelationalGroupedDataset. + Relation input = 1; + + // (Required) How the RelationalGroupedDataset was built. + GroupType group_type = 2; + + // (Required) Expressions for grouping keys + repeated Expression grouping_expressions = 3; + + // (Required) List of values that will be translated to columns in the output DataFrame. + repeated Expression aggregate_expressions = 4; + + // (Optional) Pivots a column of the current `DataFrame` and performs the specified aggregation. + Pivot pivot = 5; + + // (Optional) List of values that will be translated to columns in the output DataFrame. + repeated GroupingSets grouping_sets = 6; + + enum GroupType { + GROUP_TYPE_UNSPECIFIED = 0; + GROUP_TYPE_GROUPBY = 1; + GROUP_TYPE_ROLLUP = 2; + GROUP_TYPE_CUBE = 3; + GROUP_TYPE_PIVOT = 4; + GROUP_TYPE_GROUPING_SETS = 5; + } + + message Pivot { + // (Required) The column to pivot + Expression col = 1; + + // (Optional) List of values that will be translated to columns in the output DataFrame. + // + // Note that if it is empty, the server side will immediately trigger a job to collect + // the distinct values of the column. + repeated Expression.Literal values = 2; + } + + message GroupingSets { + // (Required) Individual grouping set + repeated Expression grouping_set = 1; + } +} + +// Relation of type [[Sort]]. +message Sort { + // (Required) Input relation for a Sort. + Relation input = 1; + + // (Required) The ordering expressions + repeated Expression.SortOrder order = 2; + + // (Optional) if this is a global sort. + optional bool is_global = 3; +} + + +// Drop specified columns. +message Drop { + // (Required) The input relation. + Relation input = 1; + + // (Optional) columns to drop. + repeated Expression columns = 2; + + // (Optional) names of columns to drop. + repeated string column_names = 3; +} + + +// Relation of type [[Deduplicate]] which have duplicate rows removed, could consider either only +// the subset of columns or all the columns. +message Deduplicate { + // (Required) Input relation for a Deduplicate. + Relation input = 1; + + // (Optional) Deduplicate based on a list of column names. + // + // This field does not co-use with `all_columns_as_keys`. + repeated string column_names = 2; + + // (Optional) Deduplicate based on all the columns of the input relation. + // + // This field does not co-use with `column_names`. + optional bool all_columns_as_keys = 3; + + // (Optional) Deduplicate within the time range of watermark. + optional bool within_watermark = 4; +} + +// A relation that does not need to be qualified by name. +message LocalRelation { + // (Optional) Local collection data serialized into Arrow IPC streaming format which contains + // the schema of the data. + optional bytes data = 1; + + // (Optional) The schema of local data. + // It should be either a DDL-formatted type string or a JSON string. + // + // The server side will update the column names and data types according to this schema. + // If the 'data' is not provided, then this schema will be required. + optional string schema = 2; +} + +// A local relation that has been cached already. +message CachedLocalRelation { + // `userId` and `sessionId` fields are deleted since the server must always use the active + // session/user rather than arbitrary values provided by the client. It is never valid to access + // a local relation from a different session/user. + reserved 1, 2; + reserved "userId", "sessionId"; + + // (Required) A sha-256 hash of the serialized local relation in proto, see LocalRelation. + string hash = 3; +} + +// Represents a remote relation that has been cached on server. +message CachedRemoteRelation { + // (Required) ID of the remote related (assigned by the service). + string relation_id = 1; +} + +// Relation of type [[Sample]] that samples a fraction of the dataset. +message Sample { + // (Required) Input relation for a Sample. + Relation input = 1; + + // (Required) lower bound. + double lower_bound = 2; + + // (Required) upper bound. + double upper_bound = 3; + + // (Optional) Whether to sample with replacement. + optional bool with_replacement = 4; + + // (Required) The random seed. + // This field is required to avoid generating mutable dataframes (see SPARK-48184 for details), + // however, still keep it 'optional' here for backward compatibility. + optional int64 seed = 5; + + // (Required) Explicitly sort the underlying plan to make the ordering deterministic or cache it. + // This flag is true when invoking `dataframe.randomSplit` to randomly splits DataFrame with the + // provided weights. Otherwise, it is false. + bool deterministic_order = 6; +} + +// Relation of type [[Range]] that generates a sequence of integers. +message Range { + // (Optional) Default value = 0 + optional int64 start = 1; + + // (Required) + int64 end = 2; + + // (Required) + int64 step = 3; + + // Optional. Default value is assigned by 1) SQL conf "spark.sql.leafNodeDefaultParallelism" if + // it is set, or 2) spark default parallelism. + optional int32 num_partitions = 4; +} + +// Relation alias. +message SubqueryAlias { + // (Required) The input relation of SubqueryAlias. + Relation input = 1; + + // (Required) The alias. + string alias = 2; + + // (Optional) Qualifier of the alias. + repeated string qualifier = 3; +} + +// Relation repartition. +message Repartition { + // (Required) The input relation of Repartition. + Relation input = 1; + + // (Required) Must be positive. + int32 num_partitions = 2; + + // (Optional) Default value is false. + optional bool shuffle = 3; +} + +// Compose the string representing rows for output. +// It will invoke 'Dataset.showString' to compute the results. +message ShowString { + // (Required) The input relation. + Relation input = 1; + + // (Required) Number of rows to show. + int32 num_rows = 2; + + // (Required) If set to more than 0, truncates strings to + // `truncate` characters and all cells will be aligned right. + int32 truncate = 3; + + // (Required) If set to true, prints output rows vertically (one line per column value). + bool vertical = 4; +} + +// Compose the string representing rows for output. +// It will invoke 'Dataset.htmlString' to compute the results. +message HtmlString { + // (Required) The input relation. + Relation input = 1; + + // (Required) Number of rows to show. + int32 num_rows = 2; + + // (Required) If set to more than 0, truncates strings to + // `truncate` characters and all cells will be aligned right. + int32 truncate = 3; +} + +// Computes specified statistics for numeric and string columns. +// It will invoke 'Dataset.summary' (same as 'StatFunctions.summary') +// to compute the results. +message StatSummary { + // (Required) The input relation. + Relation input = 1; + + // (Optional) Statistics from to be computed. + // + // Available statistics are: + // count + // mean + // stddev + // min + // max + // arbitrary approximate percentiles specified as a percentage (e.g. 75%) + // count_distinct + // approx_count_distinct + // + // If no statistics are given, this function computes 'count', 'mean', 'stddev', 'min', + // 'approximate quartiles' (percentiles at 25%, 50%, and 75%), and 'max'. + repeated string statistics = 2; +} + +// Computes basic statistics for numeric and string columns, including count, mean, stddev, min, +// and max. If no columns are given, this function computes statistics for all numerical or +// string columns. +message StatDescribe { + // (Required) The input relation. + Relation input = 1; + + // (Optional) Columns to compute statistics on. + repeated string cols = 2; +} + +// Computes a pair-wise frequency table of the given columns. Also known as a contingency table. +// It will invoke 'Dataset.stat.crosstab' (same as 'StatFunctions.crossTabulate') +// to compute the results. +message StatCrosstab { + // (Required) The input relation. + Relation input = 1; + + // (Required) The name of the first column. + // + // Distinct items will make the first item of each row. + string col1 = 2; + + // (Required) The name of the second column. + // + // Distinct items will make the column names of the DataFrame. + string col2 = 3; +} + +// Calculate the sample covariance of two numerical columns of a DataFrame. +// It will invoke 'Dataset.stat.cov' (same as 'StatFunctions.calculateCov') to compute the results. +message StatCov { + // (Required) The input relation. + Relation input = 1; + + // (Required) The name of the first column. + string col1 = 2; + + // (Required) The name of the second column. + string col2 = 3; +} + +// Calculates the correlation of two columns of a DataFrame. Currently only supports the Pearson +// Correlation Coefficient. It will invoke 'Dataset.stat.corr' (same as +// 'StatFunctions.pearsonCorrelation') to compute the results. +message StatCorr { + // (Required) The input relation. + Relation input = 1; + + // (Required) The name of the first column. + string col1 = 2; + + // (Required) The name of the second column. + string col2 = 3; + + // (Optional) Default value is 'pearson'. + // + // Currently only supports the Pearson Correlation Coefficient. + optional string method = 4; +} + +// Calculates the approximate quantiles of numerical columns of a DataFrame. +// It will invoke 'Dataset.stat.approxQuantile' (same as 'StatFunctions.approxQuantile') +// to compute the results. +message StatApproxQuantile { + // (Required) The input relation. + Relation input = 1; + + // (Required) The names of the numerical columns. + repeated string cols = 2; + + // (Required) A list of quantile probabilities. + // + // Each number must belong to [0, 1]. + // For example 0 is the minimum, 0.5 is the median, 1 is the maximum. + repeated double probabilities = 3; + + // (Required) The relative target precision to achieve (greater than or equal to 0). + // + // If set to zero, the exact quantiles are computed, which could be very expensive. + // Note that values greater than 1 are accepted but give the same result as 1. + double relative_error = 4; +} + +// Finding frequent items for columns, possibly with false positives. +// It will invoke 'Dataset.stat.freqItems' (same as 'StatFunctions.freqItems') +// to compute the results. +message StatFreqItems { + // (Required) The input relation. + Relation input = 1; + + // (Required) The names of the columns to search frequent items in. + repeated string cols = 2; + + // (Optional) The minimum frequency for an item to be considered `frequent`. + // Should be greater than 1e-4. + optional double support = 3; +} + + +// Returns a stratified sample without replacement based on the fraction +// given on each stratum. +// It will invoke 'Dataset.stat.freqItems' (same as 'StatFunctions.freqItems') +// to compute the results. +message StatSampleBy { + // (Required) The input relation. + Relation input = 1; + + // (Required) The column that defines strata. + Expression col = 2; + + // (Required) Sampling fraction for each stratum. + // + // If a stratum is not specified, we treat its fraction as zero. + repeated Fraction fractions = 3; + + // (Required) The random seed. + // This field is required to avoid generating mutable dataframes (see SPARK-48184 for details), + // however, still keep it 'optional' here for backward compatibility. + optional int64 seed = 5; + + message Fraction { + // (Required) The stratum. + Expression.Literal stratum = 1; + + // (Required) The fraction value. Must be in [0, 1]. + double fraction = 2; + } +} + + +// Replaces null values. +// It will invoke 'Dataset.na.fill' (same as 'DataFrameNaFunctions.fill') to compute the results. +// Following 3 parameter combinations are supported: +// 1, 'values' only contains 1 item, 'cols' is empty: +// replaces null values in all type-compatible columns. +// 2, 'values' only contains 1 item, 'cols' is not empty: +// replaces null values in specified columns. +// 3, 'values' contains more than 1 items, then 'cols' is required to have the same length: +// replaces each specified column with corresponding value. +message NAFill { + // (Required) The input relation. + Relation input = 1; + + // (Optional) Optional list of column names to consider. + repeated string cols = 2; + + // (Required) Values to replace null values with. + // + // Should contain at least 1 item. + // Only 4 data types are supported now: bool, long, double, string + repeated Expression.Literal values = 3; +} + + +// Drop rows containing null values. +// It will invoke 'Dataset.na.drop' (same as 'DataFrameNaFunctions.drop') to compute the results. +message NADrop { + // (Required) The input relation. + Relation input = 1; + + // (Optional) Optional list of column names to consider. + // + // When it is empty, all the columns in the input relation will be considered. + repeated string cols = 2; + + // (Optional) The minimum number of non-null and non-NaN values required to keep. + // + // When not set, it is equivalent to the number of considered columns, which means + // a row will be kept only if all columns are non-null. + // + // 'how' options ('all', 'any') can be easily converted to this field: + // - 'all' -> set 'min_non_nulls' 1; + // - 'any' -> keep 'min_non_nulls' unset; + optional int32 min_non_nulls = 3; +} + + +// Replaces old values with the corresponding values. +// It will invoke 'Dataset.na.replace' (same as 'DataFrameNaFunctions.replace') +// to compute the results. +message NAReplace { + // (Required) The input relation. + Relation input = 1; + + // (Optional) List of column names to consider. + // + // When it is empty, all the type-compatible columns in the input relation will be considered. + repeated string cols = 2; + + // (Optional) The value replacement mapping. + repeated Replacement replacements = 3; + + message Replacement { + // (Required) The old value. + // + // Only 4 data types are supported now: null, bool, double, string. + Expression.Literal old_value = 1; + + // (Required) The new value. + // + // Should be of the same data type with the old value. + Expression.Literal new_value = 2; + } +} + + +// Rename columns on the input relation by the same length of names. +message ToDF { + // (Required) The input relation of RenameColumnsBySameLengthNames. + Relation input = 1; + + // (Required) + // + // The number of columns of the input relation must be equal to the length + // of this field. If this is not true, an exception will be returned. + repeated string column_names = 2; +} + + +// Rename columns on the input relation by a map with name to name mapping. +message WithColumnsRenamed { + // (Required) The input relation. + Relation input = 1; + + + // (Optional) + // + // Renaming column names of input relation from A to B where A is the map key + // and B is the map value. This is a no-op if schema doesn't contain any A. It + // does not require that all input relation column names to present as keys. + // duplicated B are not allowed. + map rename_columns_map = 2 [deprecated=true]; + + repeated Rename renames = 3; + + message Rename { + // (Required) The existing column name. + string col_name = 1; + + // (Required) The new column name. + string new_col_name = 2; + } +} + +// Adding columns or replacing the existing columns that have the same names. +message WithColumns { + // (Required) The input relation. + Relation input = 1; + + // (Required) + // + // Given a column name, apply the corresponding expression on the column. If column + // name exists in the input relation, then replace the column. If the column name + // does not exist in the input relation, then adds it as a new column. + // + // Only one name part is expected from each Expression.Alias. + // + // An exception is thrown when duplicated names are present in the mapping. + repeated Expression.Alias aliases = 2; +} + +message WithWatermark { + + // (Required) The input relation + Relation input = 1; + + // (Required) Name of the column containing event time. + string event_time = 2; + + // (Required) + string delay_threshold = 3; +} + +// Specify a hint over a relation. Hint should have a name and optional parameters. +message Hint { + // (Required) The input relation. + Relation input = 1; + + // (Required) Hint name. + // + // Supported Join hints include BROADCAST, MERGE, SHUFFLE_HASH, SHUFFLE_REPLICATE_NL. + // + // Supported partitioning hints include COALESCE, REPARTITION, REPARTITION_BY_RANGE. + string name = 2; + + // (Optional) Hint parameters. + repeated Expression parameters = 3; +} + +// Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns set. +message Unpivot { + // (Required) The input relation. + Relation input = 1; + + // (Required) Id columns. + repeated Expression ids = 2; + + // (Optional) Value columns to unpivot. + optional Values values = 3; + + // (Required) Name of the variable column. + string variable_column_name = 4; + + // (Required) Name of the value column. + string value_column_name = 5; + + message Values { + repeated Expression values = 1; + } +} + +// Transpose a DataFrame, switching rows to columns. +// Transforms the DataFrame such that the values in the specified index column +// become the new columns of the DataFrame. +message Transpose { + // (Required) The input relation. + Relation input = 1; + + // (Optional) A list of columns that will be treated as the indices. + // Only single column is supported now. + repeated Expression index_columns = 2; +} + +message ToSchema { + // (Required) The input relation. + Relation input = 1; + + // (Required) The user provided schema. + // + // The Sever side will update the dataframe with this schema. + DataType schema = 2; +} + +message RepartitionByExpression { + // (Required) The input relation. + Relation input = 1; + + // (Required) The partitioning expressions. + repeated Expression partition_exprs = 2; + + // (Optional) number of partitions, must be positive. + optional int32 num_partitions = 3; +} + +message MapPartitions { + // (Required) Input relation for a mapPartitions-equivalent API: mapInPandas, mapInArrow. + Relation input = 1; + + // (Required) Input user-defined function. + CommonInlineUserDefinedFunction func = 2; + + // (Optional) Whether to use barrier mode execution or not. + optional bool is_barrier = 3; + + // (Optional) ResourceProfile id used for the stage level scheduling. + optional int32 profile_id = 4; +} + +message GroupMap { + // (Required) Input relation for Group Map API: apply, applyInPandas. + Relation input = 1; + + // (Required) Expressions for grouping keys. + repeated Expression grouping_expressions = 2; + + // (Required) Input user-defined function. + CommonInlineUserDefinedFunction func = 3; + + // (Optional) Expressions for sorting. Only used by Scala Sorted Group Map API. + repeated Expression sorting_expressions = 4; + + // Below fields are only used by (Flat)MapGroupsWithState + // (Optional) Input relation for initial State. + Relation initial_input = 5; + + // (Optional) Expressions for grouping keys of the initial state input relation. + repeated Expression initial_grouping_expressions = 6; + + // (Optional) True if MapGroupsWithState, false if FlatMapGroupsWithState. + optional bool is_map_groups_with_state = 7; + + // (Optional) The output mode of the function. + optional string output_mode = 8; + + // (Optional) Timeout configuration for groups that do not receive data for a while. + optional string timeout_conf = 9; +} + +message CoGroupMap { + // (Required) One input relation for CoGroup Map API - applyInPandas. + Relation input = 1; + + // Expressions for grouping keys of the first input relation. + repeated Expression input_grouping_expressions = 2; + + // (Required) The other input relation. + Relation other = 3; + + // Expressions for grouping keys of the other input relation. + repeated Expression other_grouping_expressions = 4; + + // (Required) Input user-defined function. + CommonInlineUserDefinedFunction func = 5; + + // (Optional) Expressions for sorting. Only used by Scala Sorted CoGroup Map API. + repeated Expression input_sorting_expressions = 6; + + // (Optional) Expressions for sorting. Only used by Scala Sorted CoGroup Map API. + repeated Expression other_sorting_expressions = 7; +} + +message ApplyInPandasWithState { + // (Required) Input relation for applyInPandasWithState. + Relation input = 1; + + // (Required) Expressions for grouping keys. + repeated Expression grouping_expressions = 2; + + // (Required) Input user-defined function. + CommonInlineUserDefinedFunction func = 3; + + // (Required) Schema for the output DataFrame. + string output_schema = 4; + + // (Required) Schema for the state. + string state_schema = 5; + + // (Required) The output mode of the function. + string output_mode = 6; + + // (Required) Timeout configuration for groups that do not receive data for a while. + string timeout_conf = 7; +} + +message CommonInlineUserDefinedTableFunction { + // (Required) Name of the user-defined table function. + string function_name = 1; + + // (Optional) Whether the user-defined table function is deterministic. + bool deterministic = 2; + + // (Optional) Function input arguments. Empty arguments are allowed. + repeated Expression arguments = 3; + + // (Required) Type of the user-defined table function. + oneof function { + PythonUDTF python_udtf = 4; + } +} + +message PythonUDTF { + // (Optional) Return type of the Python UDTF. + optional DataType return_type = 1; + + // (Required) EvalType of the Python UDTF. + int32 eval_type = 2; + + // (Required) The encoded commands of the Python UDTF. + bytes command = 3; + + // (Required) Python version being used in the client. + string python_ver = 4; +} + +message CommonInlineUserDefinedDataSource { + // (Required) Name of the data source. + string name = 1; + + // (Required) The data source type. + oneof data_source { + PythonDataSource python_data_source = 2; + } +} + +message PythonDataSource { + // (Required) The encoded commands of the Python data source. + bytes command = 1; + + // (Required) Python version being used in the client. + string python_ver = 2; +} + +// Collect arbitrary (named) metrics from a dataset. +message CollectMetrics { + // (Required) The input relation. + Relation input = 1; + + // (Required) Name of the metrics. + string name = 2; + + // (Required) The metric sequence. + repeated Expression metrics = 3; +} + +message Parse { + // (Required) Input relation to Parse. The input is expected to have single text column. + Relation input = 1; + // (Required) The expected format of the text. + ParseFormat format = 2; + + // (Optional) DataType representing the schema. If not set, Spark will infer the schema. + optional DataType schema = 3; + + // Options for the csv/json parser. The map key is case insensitive. + map options = 4; + enum ParseFormat { + PARSE_FORMAT_UNSPECIFIED = 0; + PARSE_FORMAT_CSV = 1; + PARSE_FORMAT_JSON = 2; + } +} + +// Relation of type [[AsOfJoin]]. +// +// `left` and `right` must be present. +message AsOfJoin { + // (Required) Left input relation for a Join. + Relation left = 1; + + // (Required) Right input relation for a Join. + Relation right = 2; + + // (Required) Field to join on in left DataFrame + Expression left_as_of = 3; + + // (Required) Field to join on in right DataFrame + Expression right_as_of = 4; + + // (Optional) The join condition. Could be unset when `using_columns` is utilized. + // + // This field does not co-exist with using_columns. + Expression join_expr = 5; + + // Optional. using_columns provides a list of columns that should present on both sides of + // the join inputs that this Join will join on. For example A JOIN B USING col_name is + // equivalent to A JOIN B on A.col_name = B.col_name. + // + // This field does not co-exist with join_condition. + repeated string using_columns = 6; + + // (Required) The join type. + string join_type = 7; + + // (Optional) The asof tolerance within this range. + Expression tolerance = 8; + + // (Required) Whether allow matching with the same value or not. + bool allow_exact_matches = 9; + + // (Required) Whether to search for prior, subsequent, or closest matches. + string direction = 10; +} diff --git a/src/scripts/spark-connect-generation/proto/spark/connect/types.proto b/src/scripts/spark-connect-generation/proto/spark/connect/types.proto new file mode 100644 index 0000000000..4f768f2015 --- /dev/null +++ b/src/scripts/spark-connect-generation/proto/spark/connect/types.proto @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = 'proto3'; + +package spark.connect; + +option java_multiple_files = true; +option java_package = "org.apache.spark.connect.proto"; +option go_package = "internal/generated"; + +// This message describes the logical [[DataType]] of something. It does not carry the value +// itself but only describes it. +message DataType { + oneof kind { + NULL null = 1; + + Binary binary = 2; + + Boolean boolean = 3; + + // Numeric types + Byte byte = 4; + Short short = 5; + Integer integer = 6; + Long long = 7; + + Float float = 8; + Double double = 9; + Decimal decimal = 10; + + // String types + String string = 11; + Char char = 12; + VarChar var_char = 13; + + // Datatime types + Date date = 14; + Timestamp timestamp = 15; + TimestampNTZ timestamp_ntz = 16; + + // Interval types + CalendarInterval calendar_interval = 17; + YearMonthInterval year_month_interval = 18; + DayTimeInterval day_time_interval = 19; + + // Complex types + Array array = 20; + Struct struct = 21; + Map map = 22; + Variant variant = 25; + + // UserDefinedType + UDT udt = 23; + + // UnparsedDataType + Unparsed unparsed = 24; + } + + message Boolean { + uint32 type_variation_reference = 1; + } + + message Byte { + uint32 type_variation_reference = 1; + } + + message Short { + uint32 type_variation_reference = 1; + } + + message Integer { + uint32 type_variation_reference = 1; + } + + message Long { + uint32 type_variation_reference = 1; + } + + message Float { + uint32 type_variation_reference = 1; + } + + message Double { + uint32 type_variation_reference = 1; + } + + message String { + uint32 type_variation_reference = 1; + string collation = 2; + } + + message Binary { + uint32 type_variation_reference = 1; + } + + message NULL { + uint32 type_variation_reference = 1; + } + + message Timestamp { + uint32 type_variation_reference = 1; + } + + message Date { + uint32 type_variation_reference = 1; + } + + message TimestampNTZ { + uint32 type_variation_reference = 1; + } + + message CalendarInterval { + uint32 type_variation_reference = 1; + } + + message YearMonthInterval { + optional int32 start_field = 1; + optional int32 end_field = 2; + uint32 type_variation_reference = 3; + } + + message DayTimeInterval { + optional int32 start_field = 1; + optional int32 end_field = 2; + uint32 type_variation_reference = 3; + } + + // Start compound types. + message Char { + int32 length = 1; + uint32 type_variation_reference = 2; + } + + message VarChar { + int32 length = 1; + uint32 type_variation_reference = 2; + } + + message Decimal { + optional int32 scale = 1; + optional int32 precision = 2; + uint32 type_variation_reference = 3; + } + + message StructField { + string name = 1; + DataType data_type = 2; + bool nullable = 3; + optional string metadata = 4; + } + + message Struct { + repeated StructField fields = 1; + uint32 type_variation_reference = 2; + } + + message Array { + DataType element_type = 1; + bool contains_null = 2; + uint32 type_variation_reference = 3; + } + + message Map { + DataType key_type = 1; + DataType value_type = 2; + bool value_contains_null = 3; + uint32 type_variation_reference = 4; + } + + message Variant { + uint32 type_variation_reference = 1; + } + + message UDT { + string type = 1; + optional string jvm_class = 2; + optional string python_class = 3; + optional string serialized_python_class = 4; + DataType sql_type = 5; + } + + message Unparsed { + // (Required) The unparsed data type string + string data_type_string = 1; + } +} diff --git a/src/scripts/spark-connect-generation/src/main.rs b/src/scripts/spark-connect-generation/src/main.rs new file mode 100644 index 0000000000..4940ae2214 --- /dev/null +++ b/src/scripts/spark-connect-generation/src/main.rs @@ -0,0 +1,9 @@ +/// Based on definition of [`tonic::include_proto`] +const GENERATED_CODE: &str = include_str!(concat!( + env!("OUT_DIR"), + concat!("/", "spark.connect", ".rs") +)); + +fn main() { + println!("{GENERATED_CODE}"); +}