diff --git a/docs/conf.py b/docs/conf.py index d8756638..7a04f74e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -31,6 +31,8 @@ r"https://www.baeldung.com/.*", # 404 Client Error: Not Found r"https://github.com/crate-workbench/cratedb-toolkit/actions/runs/.*", + # 403 Client Error: Forbidden for url + r"https://www.datacamp.com/.*", ] # Configure intersphinx. diff --git a/docs/feature/index/index.md b/docs/feature/index/index.md index 70d33beb..bdbd0573 100644 --- a/docs/feature/index/index.md +++ b/docs/feature/index/index.md @@ -1,6 +1,5 @@ -(convergent-index)= - -# Convergent Index +(hybrid-indexes)= +# Hybrid Indexes :::{include} /_include/links.md ::: diff --git a/docs/feature/query/index.md b/docs/feature/query/index.md index 73d78a83..62001309 100644 --- a/docs/feature/query/index.md +++ b/docs/feature/query/index.md @@ -1,5 +1,6 @@ (query)= (querying)= +(advanced-querying)= # Advanced Querying :::{include} /_include/links.md diff --git a/docs/index.md b/docs/index.md index 9a1d64dd..9d6b01cf 100644 --- a/docs/index.md +++ b/docs/index.md @@ -312,6 +312,7 @@ performance/index domain/index integrate/index +migrate/index reference-architectures/index ``` diff --git a/docs/migrate/index.md b/docs/migrate/index.md new file mode 100644 index 00000000..120272f9 --- /dev/null +++ b/docs/migrate/index.md @@ -0,0 +1,43 @@ +(migrations)= +# Migrations + + + +Guidelines, tools, and ETL recipes, to support migrating analytic data +warehouse workloads to CrateDB. + +::::{grid} 1 2 2 2 +:margin: 4 4 0 0 +:padding: 0 +:gutter: 2 + +:::{grid-item-card} Welcome Rockset Developers +:link: rockset/index +:link-type: doc +:link-alt: Migrate from Rockset to CrateDB + +![image](https://github.com/user-attachments/assets/006547bd-e8d3-4573-af01-4dc31d88306b) + +CrateDB automatically indexes all your data in exactly the same way as +Rockset's Converged Index™, and also uses the SQL query language. + +While Rockset is a proprietary product, CrateDB is truly open-source, so +vendor lock-in situations will be a matter of the past. ++++ +**What's inside:** +The Rockset HTTP API is sunsetting on September 30th, 2024. +Explore what CrateDB can do for you. +::: + +:::: + + +```{toctree} +:hidden: +Rockset +``` diff --git a/docs/migrate/rockset/index.md b/docs/migrate/rockset/index.md new file mode 100644 index 00000000..a16b8ce4 --- /dev/null +++ b/docs/migrate/rockset/index.md @@ -0,0 +1,316 @@ +# Welcome Rockset Developers + + + +::::{grid} 1 1 2 2 +:padding: 0 + +:::{grid-item} +:columns: auto auto 8 8 +Because the [Rockset HTTP API is sunsetting on September 30th, 2024], +and CrateDB covers a reasonable amount of features, this page +presents its capabilities to support your analytical workloads +and data consolidation efforts. + +Both Rockset and CrateDB use SQL, so there is no need for your teams to learn +a new query language or querying regime. + +When it comes to **real-time analytics and hybrid search**, +CrateDB is the only solution that offers a similar approach to converged indexing, +full-text search, vector search, and geospatial support in a single storage engine, +accessible via native SQL and HTTP endpoints. + +Because CrateDB is [truly open-source software] with a [clear commitment], +you will no longer find yourself +in vendor lock-in situations and service shutdowns due to M&A procedures and +similar occasions, being forced to migrate to a different system once again. + +As we work with more and more companies looking to migrate their workloads +from [Rockset] to [CrateDB], we have built expertise on the details of what +a [migration] entails. This page shares a few insights on this topic, and +why CrateDB is your go-to choice when selecting a cost-effective +replacement solution. +::: + +:::{grid-item-card} +:columns: auto auto 4 4 +:link: https://cratedb.com/resources/webinars/lp-wb-from-rockset-to-cratedb +:link-alt: "Webinar: From Rockset to CrateDB" +:class-header: sd-text-center sd-fs-5 sd-align-minor-center sd-font-weight-bold sd-text-capitalize +:class-body: text-smaller +:class-footer: text-smaller +{material-outlined}`cast_for_education;3.7em` + +Join our Webinar +^^^ +{material-outlined}`event_note;2.5em` Date +July 25th, 2024 + +{material-outlined}`schedule;2.5em` Time +8:00–8:45 am PST / +5:00–5:45 pm CET + +- Why CrateDB is a perfect \[Rockset\] replacement for real-time analytics and hybrid search. +- How CrateDB compares to \[Rockset\] and Elasticsearch/OpenSearch for streaming ingest. +- Why CrateDB is a cost-effective alternative to \[Rockset\]. ++++ +Register now to learn about our migration services, +and to have a live Q&A session with our experts. +::: + +:::: + + +## What's Inside + +:::{rubric} Introduction and Comparison +::: +An overview about CrateDB, and a side-by-side comparison, tuned for Rockset users. + +:::::{card} +:link: https://cratedb.com/blog/why-cratedb-is-a-perfect-rockset-replacement +:link-alt: "Why CrateDB is a perfect Rockset replacement" +:margin: 3 +:class-header: sd-font-weight-bold sd-fs-5 +:class-title: sd-fs-5 +{material-outlined}`swap_horiz;1.7em` Why CrateDB is a perfect Rockset replacement +^^^ +::::{grid} 2 +:::{grid-item} +:columns: 9 +Learn about the top five reasons for choosing CrateDB as a Rockset replacement. +1. Converged index aka. automated indexing. +2. Fully-featured SQL and HTTP interface. +3. Support for structured, semi-structured, and unstructured data. +4. Support for real-time streaming and updates. +5. CrateDB is fully open source and deployment-agnostic. +::: +:::{grid-item} +:columns: 3 +{tags-primary}`CrateDB vs. Rockset` \ +{tags-info}`Features` \ +{tags-secondary}`Blog` +::: +:::: ++++ +A detailed comparison to help you in selecting the right alternative and ensuring a +timely and seamless migration. +::::: + +:::::{card} +:link: https://cratedb.com/blog/how-cratedb-compares-to-rockbench +:link-alt: "Performance Matters" +:margin: 3 +:class-header: sd-font-weight-bold sd-fs-5 +:class-title: sd-fs-5 +{material-outlined}`stacked_line_chart;1.7em` Performance Matters +^^^ +::::{grid} 2 +:::{grid-item} +:columns: 9 +Insights into how ingest performance compares between Rockset and CrateDB. + +We used the Rockbench benchmark to evaluate ingestion performance on throughput +and data latency. The results are impressive. + +- CrateDB outperforms Rockset on the same hardware while saving between + 20% and 60% on costs. +- CrateDB achieves 6-9x lower latencies than Rockset for streaming ingest. + When data volume increases, the latency increased + linearly in Rockset, while remaining mostly flat in CrateDB. +::: +:::{grid-item} +:columns: 3 +{tags-primary}`CrateDB vs. Rockset` \ +{tags-info}`Performance` \ +{tags-secondary}`Blog` +::: +:::: ++++ +A benchmark evaluation demonstrating CrateDB's ability to efficiently support +high-velocity data streams while it delivers more constant query latencies +than Rockset. +::::: + + +:::::{card} +:link: https://cratedb.com/blog/cost-efficient-rockset-alternative +:link-alt: "Cost Matters" +:margin: 3 +:class-header: sd-font-weight-bold sd-fs-5 +:class-title: sd-fs-5 +{material-outlined}`add_card;1.7em` Cost Matters +^^^ +::::{grid} 2 +:::{grid-item} +:columns: 9 + +Based on [Rockset's pricing examples], we combined the experience of real-world +customer projects as well as Rockbench benchmark runs, to compare the costs for +multiple scenarios. + +Examples: A geospatial search application for logistics tracking, a recommendation engine, a backend +for in-app search and analytics, a backend for real-time game telemetry, +and an anomaly detection application. +::: +:::{grid-item} +:columns: 3 +{tags-primary}`CrateDB vs. Rockset` \ +{tags-info}`Costs` \ +{tags-secondary}`Blog` +::: +:::: ++++ +Results of comparing against Rockset's canonical example application scenarios +show that CrateDB Cloud is in a similar ballpark and oftentimes a bit cheaper +than solutions from other vendors. +::::: + + + +:::{rubric} Features and Benefits +::: +CrateDB's benefits and principles in a nutshell. + +:::::{grid} 1 1 2 2 +:gutter: 3 + +::::{grid-item-card} +:::{rubric} Fundamentals +::: +CrateDB is a scalable and cost-effective real-time analytics database, +combining complex JSON handling, time series, geospatial data, full-text +search, and vector search in one single storage engine. + +- [All features of CrateDB at a glance] +- [The CrateDB Documentation] + +:::{rubric} Foundation +::: +Built on top of Apache Lucene and Elasticsearch, CrateDB [automatically +indexes all your data] in exactly the same way as Rockset's [Converged +Index™], in order to achieve millisecond response times for any kind +of query and aggregation workloads. + +:::{rubric} Availability +::: +CrateDB is offered as a managed service available on AWS, Azure, and GCP, +and also as a fully open source edition. + +- No vendor lock-in and service sunsetting woes. +- No capacity limits. + +See [CrateDB Cloud] and [CrateDB Editions]. +:::: + +::::{grid-item-card} +:::{rubric} Interoperability, Open Source, Open Standards +::: +Rockset is a proprietary solution available as a managed service in AWS only. + +With CrateDB, you leverage open standards to avoid vendor lock-in and +future-proof your systems. + +CrateDB's [open-source code base] accepts contributions from community +members and corporates, so you can easily add features you need, and +distribute them to downstream users in multiple ways, without any +licensing hassles. + +:::{rubric} Development and Embedding +::: +The attributes enumerated above allow you to integrate CrateDB into your +products seamlessly, and supply your engineers and continuous integration +systems with dedicated instances of CrateDB, instead of needing to run +them against a cloud-only service, which is slowing down development, +mostly due to API rate-limiting measures. +:::: + +::::: + + +## Learn +Learn how to migrate your database workloads from Rockset to CrateDB. + +:::::{grid} 1 1 2 2 +:gutter: 3 + +::::{grid-item-card} +:::{rubric} SQL Query Language +::: +CrateDB's lingua franca is SQL, ready for big data, very similar to +Rockset's SQL dialect. +- [CrateDB SQL] +- [Advanced Querying] +:::{rubric} Migrating queries from Rockset to CrateDB +::: +Because both Rockset and CrateDB use SQL, there is no need for your teams to +learn a new query language. There are a few differences in the SQL dialect, +where we provide relevant support information to make transitioning easier. +:::{toctree} +Migrate Queries +::: +:::: + +::::{grid-item-card} +:::{rubric} Integrating data from other sources into CrateDB +::: +With Rockset, as a complete data warehouse system, integrating data from other +sources is a nobrainer, and provides exceptional UX/DX. + +CrateDB offers a wide range of integration capabilities, is compatible +with the PostgreSQL wire protocol, and offers adapter components to +integrate with applications and frameworks. + +- [Ecosystem Catalog] +- [Integration Tutorials I] +- [Integration Tutorials II] +- [Software Development Kit] + +:::{rubric} Integrating CrateDB technologies into Rockset-based infrastructure +::: +The [Rockset HTTP API Adapter for CrateDB] is an experiment to provide +CrateDB's features through an API that is compatible with the Rockset HTTP +API, so client programs and libraries can work unmodified. + +It has been verified to work for the most basic API calls with plain +HTTP requests using curl or HTTPie, the Rockset CLI, and Java, JavaScript, +and Python example programs. +:::: + +::::: + + + +[Advanced Querying]: project:#advanced-querying +[All features of CrateDB at a glance]: project:#all-features +[Amazon DynamoDB Streams]: https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Streams.html +[Amazon Kinesis Data Streams]: https://aws.amazon.com/kinesis/ +[Apache/Confluent Kafka Streams]: https://kafka.apache.org/documentation/streams/ +[automatically indexes all your data]: project:#hybrid-indexes +[clear commitment]: https://cratedb.com/blog/opensource-licensing-founder +[Converged Index™]: https://rockset.com/blog/converged-indexing-the-secret-sauce-behind-rocksets-fast-queries/ +[CrateDB]: https://cratedb.com/database +[CrateDB Cloud]: https://cratedb.com/docs/cloud/ +[CrateDB Editions]: https://cratedb.com/database/editions +[CrateDB SQL]: project:#sql +[DX]: https://en.wikipedia.org/wiki/User_experience#Developer_experience +[Ecosystem Catalog]: https://cratedb.com/docs/crate/clients-tools/ +[Integration Tutorials I]: inv:#integrate +[Integration Tutorials II]: https://community.cratedb.com/t/overview-of-cratedb-integration-tutorials/1015 +[migration]: https://cratedb.com/migrations/rockset +[MongoDB Atlas Change Streams]: https://www.mongodb.com/docs/manual/changeStreams/ +[open-source code base]: https://github.com/crate/crate +[Rockset]: https://rockset.com/product/ +[Rockset's pricing examples]: https://docs.rockset.com/documentation/docs/billing#pricing-examples +[Rockset HTTP API Adapter for CrateDB]: https://cratedb-toolkit.readthedocs.io/adapter/rockset.html +[Rockset HTTP API is sunsetting on September 30th, 2024]: https://docs.rockset.com/documentation/docs/faq +[Software Development Kit]: https://cratedb-toolkit.readthedocs.io/ +[The CrateDB Documentation]: https://cratedb.com/docs/ +[truly open-source software]: https://cratedb.com/blog/cratedb-doubling-down-on-permissive-licensing-and-the-elasticsearch-lockdown +[UX]: https://en.wikipedia.org/wiki/User_experience +[Zero-ETL]: https://www.datacamp.com/blog/what-is-zero-etl diff --git a/docs/migrate/rockset/query.md b/docs/migrate/rockset/query.md new file mode 100644 index 00000000..d952fca3 --- /dev/null +++ b/docs/migrate/rockset/query.md @@ -0,0 +1,163 @@ +# Migrating queries from Rockset to CrateDB + +## Introduction +As we work with more and more companies looking to migrate their workloads from +[Rockset] to CrateDB, we have built expertise on the details of what a [migration] +entails. + +Both Rockset and CrateDB use SQL, so there is no need for your teams to +learn a new query language, there are however a few differences in the dialect. + +## First level columns +In Rockset, every record is entirely a JSON object, but in CrateDB tables have +first level columns which themselves can be of type [OBJECT]. +If you are looking at migrating data from Rockset keeping changes to a minimum, +you can just define your tables with a single column of type `OBJECT`. +```sql +CREATE TABLE myschema.mytable ( + data OBJECT +); +``` + +## Accessing fields within objects +Rockset uses a dot notation for attribute access. +In CrateDB, a bracket notation is used instead, similar to what you would use in +Python for accessing dictionaries. +```sql +INSERT INTO myschema.mytable +VALUES('{"field1":1,"field2":"abc","field3":[1,2,3]}'); + +SELECT data['field2'] +FROM myschema.mytable; +``` + +## Inspecting inferred schemas +After you load some JSON data into your database, you may want to check the schema +CrateDB has inferred automatically. +In Rockset, you would do this with the `DESCRIBE` command, in CrateDB you can use +`SHOW CREATE TABLE`. +```sql +SHOW CREATE TABLE myschema.mytable; +``` + +## Un-nesting arrays +Both Rockset and CrateDB provide an [UNNEST] function for un-nesting arrays. +In Rockset however you may have queries where `UNNEST` is used in a `CROSS JOIN`, +like in the [example with cars data in Rockset's documentation]. +To achieve the same results in CrateDB, you can use `UNNEST` in the list of +columns you are selecting instead. +```sql +SELECT data['make'], UNNEST(data['models']) +FROM companies; +``` + +## Functions equivalence +You will find that a large number of functions work exactly in the same way in +Rockset and CrateDB. There are however some functions that exists under +different names. + +We have compiled below a list of equivalences, and will come back and expand +this list as new functions come out in our work with prospects. + +If there is anything in your queries for which you do not see an equivalence, +do not hesitate to reach out as chances are CrateDB has the same functionality +under a different name, or there may be a simple workaround for your use cases. + + +| Rockset function | CrateDB equivalent | +| --- | --- | +| ACOSH(x)| `LN(x + SQRT((x * x) - 1))` | +| APPROX_DISTINCT(x[, e])| `hyperloglog_distinct` | +| ARRAY_CONCAT(array1, array2, ...)| `array_cat` | +| ARRAY_CONTAINS(array, element)| `element = ANY (array)` | +| ARRAY_CREATE(val1, val2, ...)| `[val1, val2, ...]` or `_array(val1,val2, ...)` | +| ARRAY_DISTINCT(array)| `array_unique` | +| ARRAY_EXCEPT(array1, array2)| `array_unique(array_difference(array1, array2))` | +| ARRAY_FLATTEN(array)| `array_unnest` | +| ARRAY_JOIN(array, delimiter, nullReplacement)| [`array_to_string`] | +| ARRAY_MAP(function_name, array)| `(select array_agg(function_name(unnest)) from unnest(array))` | +| ARRAY_REMOVE(array, val)| `array_difference(array,[val])` | +| ARRAY_SHUFFLE(array)| `array(select unnest from unnest(array) ORDER BY random())` | +| ARRAY_SORT(array)| `array(select unnest from unnest(array) ORDER BY unnest)` | +| ARRAY_UNION(array1, array2)| `array_unique` | +| ASINH(x)| `LN(x + SQRT((x * x) + 1))` | +| ATANH(x)| `0.5*ln((1+x)/(1-x))` | +| BITWISE_AND(x, y)| `x & y` | +| BITWISE_OR(x, y)| `x \| y` | +| BITWISE_XOR(x, y)| `x # y` | +| BOOL_AND(x)| `val1 AND val2 AND ... ` | +| BOOL_OR(x)| `val1 OR val2 OR ...` | +| CARDINALITY(array)| `array_length(array,1)` | +| COUNT_IF(x)| `COUNT(*) FILTER (WHERE x)` | +| DATE_PARSE(string, format)| UDF: [`to_date`] | +| DAYS(n)| `'n DAYS'::INTERVAL` | +| EUCLIDEAN_DIST(array, array)| UDF: [`n_dimensional_distance`] | +| EVERY(x)| `val1 AND val2 AND ... ` | +| FORMAT_DATE(format, date)| `date_format` | +| FORMAT_DATETIME(format, datetime)| `date_format` | +| FORMAT_TIME(format, time)| `date_format` | +| FORMAT_TIMESTAMP(format, timestamp[, timezone]| `date_format` | +| FROM_BASE64(s)| `decode(s, 'base64')` | +| FROM_HEX(s)| `decode(s, 'hex')` | +| HOURS(n)| `'n HOURS'::INTERVAL` | +| HYPOT(x, y)| `SQRT(x*x+y*y)` | +| IS DISTINCT FROM| `<>` | +| JSON_FORMAT(x)| `x::TEXT` | +| JSON_PARSE(x)| `x::OBJECT` | +| KEYS(obj)| `object_keys(obj)` | +| LOG(x)| `ln(x)` | +| LOG10(x)| `log(x,10)` | +| LOG2(x)| `log(x,2)` | +| MILLISECONDS(n)| `AGE(n::LONG,0)` | +| MINUTES(n)| `'n MINUTES'::INTERVAL` | +| MONTHS(n)| `'n MONTHS'::INTERVAL` | +| PARSE_DATE_ISO8601(string)| `date_trunc('day',string::TIMESTAMP)` | +| PARSE_DATETIME_ISO8601(string)| `string::TIMESTAMP` | +| POW(x, y)| `power(x,y)` | +| RAND()| `random()` | +| SEQUENCE(start, stop[, step])| `generate_series` | +| SIGN(x)| See [^sign] for CrateDB <5.8 | +| ST_ASTEXT(geography)| See [](#ST_ASTEXT) for `POLYGON`s | +| ST_GEOGFROMTEXT(well_known_text)| `well_known_text::geo_shape` | +| ST_GEOGPOINT(longitude, latitude)| `[longitude, latitude]::geo_point` | +| ST_INTERSECTS(geography_a, geography_b)| `intersects(geo_shape, geo_shape)` | +| ST_X(point)| `longitude(point)` | +| ST_Y(point)| `latitude(point)` | +| TIME_BUCKET(interval, timestamp[, origin])| `date_bin` | +| TIMESTAMP_SECONDS(n)| `(n::DOUBLE)::timestamp` | +| TO_BASE64(b)| `encode(b, 'base64')` | +| TO_HEX(b)| `encode(b, 'hex')` | +| TRUNCATE(x)| `trunc(x)` | +| TYPEOF(x)| `pg_typeof(x)` | +| YEARS(n)| `'n YEARS'::INTERVAL` | + +[`array_to_string`]: https://cratedb.com/docs/crate/reference/en/latest/general/builtins/scalar-functions.html#array-to-string-anyarray-separator-null-string +[`n_dimensional_distance`]: https://community.cratedb.com/t/user-defined-function-collection/773 +[`to_date`]: https://github.com/crate/crate/issues/9663#issuecomment-2178878930 + + +## Appendix + +(ST_ASTEXT)= +### ST_ASTEXT +CrateDB [user-defined function (UDF)] implementation of `ST_ASTEXT` function for polygons. +```text +CREATE FUNCTION ST_ASTEXT(geography geo_shape) + RETURNS TEXT + LANGUAGE JAVASCRIPT AS $$ + function st_astext(g) { + return 'POLYGON(' + g.coordinates.map(r => '(' + r.map(p => p.join(' ')).join(', ') + ')').join(', ') + ')'; + } + $$; +``` + + +[^sign]: The `SIGN` function has been added in CrateDB 5.8 - in older versions use `IF(x<0,-1,IF(x=0,0,1))`. + + +[example with cars data in Rockset's documentation]: https://docs.rockset.com/documentation/reference/select#unnest +[migration]: https://cratedb.com/migrations/rockset +[OBJECT]: https://cratedb.com/docs/crate/reference/en/latest/general/ddl/data-types.html#objects +[Rockset]: https://rockset.com/ +[UNNEST]: https://cratedb.com/docs/crate/reference/en/latest/general/builtins/table-functions.html#unnest-array-array +[user-defined function (UDF)]: https://cratedb-guide--53.org.readthedocs.build/feature/udf/ diff --git a/docs/requirements.txt b/docs/requirements.txt index 887df9c8..6d6bd2a4 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,4 +1,4 @@ -crate-docs-theme==0.34.0.dev2 +crate-docs-theme==0.34.0.dev3 # cache-buster-20240305 # Remark: Used for PyPI download cache busting on GHA.