From 06d89b6dda214a6caacc9db08355bd74843cc11c Mon Sep 17 00:00:00 2001 From: Jake Mulford Date: Tue, 29 Mar 2022 22:05:40 -0400 Subject: [PATCH 1/8] A notebooks --- .../A1 Create tables and fake data .md | 18 +++--- .../A2 Filter and decorate .md | 46 +++++++------- .../A3 Do time series and relational joins.md | 26 ++++---- .../A4 Group and aggregate .md | 62 +++++++++---------- .../A5 If-thens and conditions.md | 16 ++--- .../A. Real-time table ops/A6 Time methods.md | 42 ++++++------- 6 files changed, 103 insertions(+), 107 deletions(-) diff --git a/demo/web/src/main/notebooks/A. Real-time table ops/A1 Create tables and fake data .md b/demo/web/src/main/notebooks/A. Real-time table ops/A1 Create tables and fake data .md index 4251fccc7c7..e19c6a221f3 100644 --- a/demo/web/src/main/notebooks/A. Real-time table ops/A1 Create tables and fake data .md +++ b/demo/web/src/main/notebooks/A. Real-time table ops/A1 Create tables and fake data .md @@ -2,10 +2,10 @@ Throughout this demo notebook series, we show many of the ways to interact with real-time data in Deephaven. Here, we create some tables with fake data; in other notebooks, we show how to perform table operations on that data. Knowing how to create fake ticking tables is useful for familiarizing yourself with Deephaven, but also for working on proof of concepts without necessarily having a complete dataset. -`timeTable` is a great tool to simulate real-time data. We can use this and Python's `random` library to generate some fake data. +`time_table` is a great tool to simulate real-time data. We can use this and Python's `random` library to generate some fake data. ```python -from deephaven.TableTools import timeTable +from deephaven2 import time_table import random import string @@ -17,12 +17,10 @@ def random_character(): def random_boolean(): return random.choice([True, False]) -table = timeTable("00:00:01").update("Number = (int)random_int()")\ - .update("Character = (String)random_character()")\ - .update("Boolean = (boolean)random_boolean()") +table = time_table("00:00:01").update(formulas=["Number = (int)(byte)random_int()", "Character = (String)random_character()", "Boolean = (boolean)random_boolean()"]) ``` -Let's wrap `timeTable` with a method and parameterize the time intervals and start times. This will allow us to reuse it throughout the notebooks. +Let's wrap `time_table` with a method and parameterize the time intervals and start times. This will allow us to reuse it throughout the notebooks. ```python def create_random_table(time_interval, start_time=None): @@ -38,13 +36,11 @@ def create_random_table(time_interval, start_time=None): """ table = None if start_time is None: - table = timeTable(time_interval) + table = time_table(time_interval) else: - table = timeTable(start_time, time_interval) + table = time_table(period=time_interval, start_time=start_time) - return table.update("Number = (int)random_int()")\ - .update("Character = (String)random_character()")\ - .update("Boolean = (boolean)random_boolean()") + return table.update(formulas=["Number = (int)(byte)random_int()", "Character = (String)random_character()", "Boolean = (boolean)random_boolean()"]) ``` We can use this method to create some tables with random data. diff --git a/demo/web/src/main/notebooks/A. Real-time table ops/A2 Filter and decorate .md b/demo/web/src/main/notebooks/A. Real-time table ops/A2 Filter and decorate .md index cbcf6684cf6..0362b428c0c 100644 --- a/demo/web/src/main/notebooks/A. Real-time table ops/A2 Filter and decorate .md +++ b/demo/web/src/main/notebooks/A. Real-time table ops/A2 Filter and decorate .md @@ -5,19 +5,19 @@ In this notebook, we show how to decorate and filter our data. Let's start by simulating measurements of our values every minute. This could represent something like stock prices, temperatures, etc. ```python -from deephaven.DateTimeUtils import currentTime, expressionToNanos, minus +from deephaven2.time import now, to_nanos, minus -time_interval = expressionToNanos("T1M") -offset = expressionToNanos("10D") -now = currentTime() +time_interval = to_nanos("00:01:00") +offset = to_nanos("240:00:00") +now_ = now() -daily_data = create_random_table(time_interval, start_time=minus(now, offset)) +daily_data = create_random_table(time_interval, start_time=minus(now_, offset)) ``` Now we decorate the data by adding its day of the week. ```python -daily_data = daily_data.update("DayOfWeekInt = dayOfWeek(Timestamp, TZ_NY)") +daily_data = daily_data.update(formulas=["DayOfWeekInt = dayOfWeek(Timestamp, TZ_NY)"]) ``` Next, we convert the day of week to a string representation. @@ -28,35 +28,35 @@ import calendar def day_of_week_int_to_str(day_of_week): return calendar.day_name[day_of_week-1] -daily_data = daily_data.update("DayOfWeekStr = day_of_week_int_to_str(DayOfWeekInt)") +daily_data = daily_data.update(formulas=["DayOfWeekStr = day_of_week_int_to_str(DayOfWeekInt)"]) ``` Deephaven provides a wealth of [filtering methods for tables](https://deephaven.io/core/docs/how-to-guides/use-filters/). We start by filtering using simple boolean expressions. ```python -from deephaven.filter import or_ +evens = daily_data.where(filters=["Number % 2 == 0"]) +odds = daily_data.where(["!(Number % 2 == 0)"]) -evens = daily_data.where("Number % 2 == 0") -odds = daily_data.where("!(Number % 2 == 0)") +trues = daily_data.where(filters=["Boolean"]) +falses = daily_data.where(filters=["!Boolean"]) -trues = daily_data.where("Boolean") -falses = daily_data.where("!Boolean") - -evens_and_trues = daily_data.where("Number % 2 == 0", "Boolean") -odds_or_falses = daily_data.where(or_("!(Number % 2 == 0)", "!Boolean")) +evens_and_trues = daily_data.where(filters=["Number % 2 == 0", "Boolean"]) +odds_or_falses = daily_data.where_one_of(filters=["!(Number % 2 == 0)", "!Boolean"]) ``` Some filtering methods can apply a filter to one table based on another. ```python -from deephaven.TableTools import newTable, stringCol +from deephaven2 import new_table +from deephaven2.column import string_col + -vowels_table = newTable( - stringCol("Vowels", "A", "E", "I", "O", "U") -) +vowels_table = new_table([ + string_col("Vowels", ["A", "E", "I", "O", "U"]) +]) -vowels = daily_data.whereIn(vowels_table, "Character = Vowels") -consonants = daily_data.whereNotIn(vowels_table, "Character = Vowels") +vowels = daily_data.where_in(filter_table=vowels_table, cols=["Character = Vowels"]) +consonants = daily_data.where_not_in(filter_table=vowels_table, cols=["Character = Vowels"]) ``` You can also define custom functions to perform filtering. Here, we omit weekend days. @@ -65,8 +65,8 @@ You can also define custom functions to perform filtering. Here, we omit weekend def is_weekday(day_of_week): return day_of_week <= 5 #Weekdays are 1 through 5 -weekdays = daily_data.where("(boolean)is_weekday(DayOfWeekInt)") -weekends = daily_data.where("!((boolean)is_weekday(DayOfWeekInt))") +weekdays = daily_data.where(filters=["(boolean)is_weekday(DayOfWeekInt)"]) +weekends = daily_data.where(filters=["!((boolean)is_weekday(DayOfWeekInt))"]) ``` [The next notebook](A3%20Do%20time%20series%20and%20relational%20joins.md) will show how to perform joins on this data. diff --git a/demo/web/src/main/notebooks/A. Real-time table ops/A3 Do time series and relational joins.md b/demo/web/src/main/notebooks/A. Real-time table ops/A3 Do time series and relational joins.md index 7e70924476d..8b139ad1adc 100644 --- a/demo/web/src/main/notebooks/A. Real-time table ops/A3 Do time series and relational joins.md +++ b/demo/web/src/main/notebooks/A. Real-time table ops/A3 Do time series and relational joins.md @@ -5,15 +5,15 @@ In this notebook, we show how to perform joins with our time series data. Let's start again by simulating measurements of our values every minute, but this time using two tables with slightly different timestamps. This is great for a simulation because there's no guarantee that a real example will collect data with exact timestamp matches. ```python -from deephaven.DateTimeUtils import currentTime, expressionToNanos, minus +from deephaven2.time import now, to_nanos, minus -time_interval = expressionToNanos("T1M") -offset_0 = expressionToNanos("10DT2S") -offset_1 = expressionToNanos("10D") -now = currentTime() +time_interval = to_nanos("00:01:00") +offset_0 = to_nanos("240:00:02") +offset_1 = to_nanos("240:00:00") +now_ = now() -daily_data_0 = create_random_table(time_interval, start_time=minus(now, offset_0)) -daily_data_1 = create_random_table(time_interval, start_time=minus(now, offset_1)) +daily_data_0 = create_random_table(time_interval, start_time=minus(now_, offset_0)) +daily_data_1 = create_random_table(time_interval, start_time=minus(now_, offset_1)) ``` To join these tables together based on the timestamps, we need to use an [as-of join, or `aj`](https://deephaven.io/core/docs/reference/table-operations/join/aj/). As-of joins perform exact matches across all given columns except for the last one, which instead matches based on the closest values. @@ -23,7 +23,7 @@ For an `aj`, the values in the right table are matched to the closest values in Let's join these tables using an `aj` to get a single table with all of our information. ```python -joined_data_aj = daily_data_0.aj(daily_data_1, "Timestamp", "Number1 = Number, Character1 = Character, Boolean1 = Boolean") +joined_data_aj = daily_data_0.aj(table=daily_data_1, on=["Timestamp"], joins=["Number1 = Number", "Character1 = Character", "Boolean1 = Boolean"]) ``` Deephaven supports another type of as of join, an `raj`. For a `raj`, the values in the right table are matched to the closest values in the left table without going under the left value. For example, if the right table contains a value `5` and the left table contains values `4` and `6`, the right table's `5` will be matched on the left table's `4`. @@ -31,18 +31,18 @@ Deephaven supports another type of as of join, an `raj`. For a `raj`, the values Let's also join these tables using a `raj`. ```python -joined_data_raj = daily_data_0.raj(daily_data_1, "Timestamp", "Number1 = Number, Character1 = Character, Boolean1 = Boolean") +joined_data_raj = daily_data_0.raj(table=daily_data_1, on=["Timestamp"], joins=["Number1 = Number", "Character1 = Character", "Boolean1 = Boolean"]) ``` As of joins work very well with time-tables that sample at different frequencies. Let's create two new tables, one that samples every second and one that samples every ten seconds, and show what happesn when we join them together using `aj` and `raj`. ```python -time_interval_0 = expressionToNanos("T1S") -time_interval_1 = expressionToNanos("T10S") +time_interval_0 = to_nanos("00:00:01") +time_interval_1 = to_nanos("00:00:10") sample_data_0 = create_random_table(time_interval_0) sample_data_1 = create_random_table(time_interval_1) -sample_data_aj = sample_data_0.aj(sample_data_1, "Timestamp", "Number1 = Number, Character1 = Character, Boolean1 = Boolean") -sample_data_raj = sample_data_0.raj(sample_data_1, "Timestamp", "Number1 = Number, Character1 = Character, Boolean1 = Boolean") +sample_data_aj = sample_data_0.aj(table=sample_data_1, on=["Timestamp"], joins=["Number1 = Number", "Character1 = Character", "Boolean1 = Boolean"]) +sample_data_raj = sample_data_0.raj(table=sample_data_1, on=["Timestamp"], joins=["Number1 = Number", "Character1 = Character", "Boolean1 = Boolean"]) ``` diff --git a/demo/web/src/main/notebooks/A. Real-time table ops/A4 Group and aggregate .md b/demo/web/src/main/notebooks/A. Real-time table ops/A4 Group and aggregate .md index 21e08a7588d..028384360d1 100644 --- a/demo/web/src/main/notebooks/A. Real-time table ops/A4 Group and aggregate .md +++ b/demo/web/src/main/notebooks/A. Real-time table ops/A4 Group and aggregate .md @@ -5,21 +5,21 @@ In this notebook, we show how to perform groupings and aggregations with our tim Let's start again by simulating some real-time data. Here, we'll use a shorter time interval. ```python -from deephaven.DateTimeUtils import currentTime, expressionToNanos, minus +from deephaven2.time import now, to_nanos, minus -time_interval = expressionToNanos("T1S") -offset = expressionToNanos("T60S") -now = currentTime() +time_interval = to_nanos("00:00:01") +offset = to_nanos("00:01:00") +now_ = now() -daily_data = create_random_table(time_interval, start_time=minus(now, offset)) +daily_data = create_random_table(time_interval, start_time=minus(now_, offset)) ``` Let's group this data a few different ways based on the columns we have. ```python -group_by_character = daily_data.groupBy("Character") -group_by_boolean = daily_data.groupBy("Boolean") -group_by_odd_even = daily_data.update("IsEven = (Number % 2 == 0)").groupBy("IsEven").dropColumns("IsEven") +group_by_character = daily_data.group_by(by=["Character"]) +group_by_boolean = daily_data.group_by(by=["Boolean"]) +group_by_odd_even = daily_data.update(formulas=["IsEven = (Number % 2 == 0)"]).group_by(by=["IsEven"]).drop_columns(["IsEven"]) ``` Just like all other real-time operations in Deephaven, these groupings update in real time as the source table updates. @@ -27,36 +27,36 @@ Just like all other real-time operations in Deephaven, these groupings update in Aggregations allow you to perform operations on grouped data. The query below computes statistics on our data. ```python -from deephaven import Aggregation as agg, as_list +from deephaven2 import agg -average_agg = as_list([ - agg.AggAvg("Number") -]) +average_agg = [ + agg.avg(["Number"]) +] -total_average = daily_data.aggBy(average_agg) -average_by_character = daily_data.aggBy(average_agg, "Character") -average_by_boolean = daily_data.aggBy(average_agg, "Boolean") -average_by_even_odd = daily_data.update("IsEven = (Number % 2 == 0)").aggBy(average_agg, "IsEven") +total_average = daily_data.agg_by(average_agg, []) +average_by_character = daily_data.agg_by(average_agg, ["Character"]) +average_by_boolean = daily_data.agg_by(average_agg, ["Boolean"]) +average_by_even_odd = daily_data.update(formulas=["IsEven = (Number % 2 == 0)"]).agg_by(average_agg, ["IsEven"]) ``` We can also compute counts on the data. ```python -boolean_count_agg = as_list([ - agg.AggCount("BooleanCount") -]) -boolean_counts = daily_data.aggBy(boolean_count_agg, "Boolean") - -character_count_agg = as_list([ - agg.AggCount("CharacterCount") -]) -character_counts = daily_data.aggBy(character_count_agg, "Character") - -combo_count_avg_agg = as_list([ - agg.AggCount("ComboCount"), - agg.AggAvg("AvgNumber = Number") -]) -combo_counts_avg = daily_data.aggBy(combo_count_avg_agg, "Boolean", "Character") +boolean_count_agg = [ + agg.count_("BooleanCount") +] +boolean_counts = daily_data.agg_by(boolean_count_agg, ["Boolean"]) + +character_count_agg = [ + agg.count_("CharacterCount") +] +character_counts = daily_data.agg_by(character_count_agg, ["Character"]) + +combo_count_avg_agg = [ + agg.count_("ComboCount"), + agg.avg(["AvgNumber = Number"]) +] +combo_counts_avg = daily_data.agg_by(combo_count_avg_agg, ["Boolean", "Character"]) ``` Just like the grouped data, these aggregations update in real time. You don't need to recompute any of the averages or counts, Deephaven natively supports these real-time updates. diff --git a/demo/web/src/main/notebooks/A. Real-time table ops/A5 If-thens and conditions.md b/demo/web/src/main/notebooks/A. Real-time table ops/A5 If-thens and conditions.md index 7584b8b47f5..84e1254e2f9 100644 --- a/demo/web/src/main/notebooks/A. Real-time table ops/A5 If-thens and conditions.md +++ b/demo/web/src/main/notebooks/A. Real-time table ops/A5 If-thens and conditions.md @@ -5,13 +5,13 @@ In this notebook, we show how to use if-then statements and conditionals in Deep Let's start again by simulating some real-time data. ```python -from deephaven.DateTimeUtils import currentTime, expressionToNanos, minus +from deephaven2.time import now, to_nanos, minus -time_interval = expressionToNanos("1D") -offset = expressionToNanos("1000D") -now = currentTime() +time_interval = to_nanos("24:00:00") +offset = to_nanos("24000:00:00") +now_ = now() -daily_data = create_random_table(time_interval, start_time=minus(now, offset)) +daily_data = create_random_table(time_interval, start_time=minus(now_, offset)) ``` ## Ternaries @@ -19,7 +19,7 @@ daily_data = create_random_table(time_interval, start_time=minus(now, offset)) Deephaven supports [Java ternaries](https://deephaven.io/core/docs/reference/query-language/control-flow/ternary-if/). We can use these to perform if-else statements on our tables to assign values to a new column based on the if-else evaluation. ```python -daily_data = daily_data.update("IsEven = Number%2 == 0 ? true : false") +daily_data = daily_data.update(formulas=["IsEven = Number%2 == 0 ? true : false"]) ``` This query creates a new column `IsEven` in the `daily_data` table. @@ -34,6 +34,6 @@ Let's use the `is_weekday` method in a ternary statement to create a new column def is_weekday(day_of_week): return day_of_week <= 5 -daily_data = daily_data.update("DayOfWeekInt = dayOfWeek(Timestamp, TZ_NY)")\ - .update("IsWeekday = (Boolean)is_weekday(DayOfWeekInt) ? true: false") +daily_data = daily_data.update(formulas=["DayOfWeekInt = dayOfWeek(Timestamp, TZ_NY)"])\ + .update(formulas=["IsWeekday = (Boolean)is_weekday(DayOfWeekInt) ? true: false"]) ``` diff --git a/demo/web/src/main/notebooks/A. Real-time table ops/A6 Time methods.md b/demo/web/src/main/notebooks/A. Real-time table ops/A6 Time methods.md index cfc4804b08c..03f6e057d13 100644 --- a/demo/web/src/main/notebooks/A. Real-time table ops/A6 Time methods.md +++ b/demo/web/src/main/notebooks/A. Real-time table ops/A6 Time methods.md @@ -5,13 +5,13 @@ In this notebook, we show how to use Deephaven's time methods on time series dat Let's start again by simulating some real-time data, but this time with a shorter time interval. ```python -from deephaven.DateTimeUtils import currentTime, expressionToNanos, minus +from deephaven2.time import now, to_nanos, minus -time_interval = expressionToNanos("T1S") -offset = expressionToNanos("T1000S") -now = currentTime() +time_interval = to_nanos("00:00:01") +offset = to_nanos("00:16:40") +now_ = now() -daily_data = create_random_table(time_interval, start_time=minus(now, offset)) +daily_data = create_random_table(time_interval, start_time=minus(now_, offset)) ``` ## Datetime arithmethic @@ -21,11 +21,11 @@ Deephaven supports DateTime arithmetic through methods such as [plus](https://de This example shows how to subtract 2 hours from a timestamp: ```python -from deephaven.DateTimeUtils import convertPeriod +from deephaven2.time import to_period -two_hours = convertPeriod("T2H") +two_hours = to_period("T2H") -daily_data = daily_data.update("TimestampTwoHoursBefore = minus(Timestamp, two_hours)") +daily_data = daily_data.update(formulas=["TimestampTwoHoursBefore = minus(Timestamp, two_hours)"]) ``` ## Downsampling @@ -37,18 +37,18 @@ With time series data, binning methods like [lowerBin](https://deephaven.io/core This example shows how to group timestamps by the minute, and then store the sum of the `Number` column for each minute. ```python -from deephaven.DateTimeUtils import expressionToNanos -from deephaven import Aggregation as agg, as_list +from deephaven2.time import to_nanos +from deephaven2 import agg -agg_list = as_list([ - agg.AggSum("Number") -]) +agg_list = [ + agg.sum_(["Number"]) +] -nanos_bin = expressionToNanos("T1M") +nanos_bin = to_nanos("00:01:00") -daily_data_binned = daily_data.update("TimestampMinute = lowerBin(Timestamp, nanos_bin)")\ - .dropColumns("Timestamp")\ - .aggBy(agg_list, "TimestampMinute") +daily_data_binned = daily_data.update(formulas=["TimestampMinute = lowerBin(Timestamp, nanos_bin)"])\ + .drop_columns(["Timestamp"])\ + .agg_by(agg_list, ["TimestampMinute"]) ``` ## as-of joins @@ -58,11 +58,11 @@ daily_data_binned = daily_data.update("TimestampMinute = lowerBin(Timestamp, nan This example shows how to join two time-series tables that have different densities in timestamps. ```python -time_interval = expressionToNanos("T10S") -offset = expressionToNanos("T1000S") -now = currentTime() +time_interval = to_nanos("00:00:10") +offset = to_nanos("00:16:40") +now = now() daily_data_two = create_random_table(time_interval, start_time=minus(now, offset)) -joined_daily_data = daily_data.aj(daily_data_two, "Timestamp", "NumberTwo = Number, CharacterTwo = Character, BooleanTwo = Boolean") +joined_daily_data = daily_data.aj(table=daily_data_two, on=["Timestamp"], joins=["NumberTwo = Number", "CharacterTwo = Character", "BooleanTwo = Boolean"]) ``` From f4073c049b97a43029874c738b5be9e61aaa70bd Mon Sep 17 00:00:00 2001 From: Jake Mulford Date: Wed, 30 Mar 2022 13:05:27 -0400 Subject: [PATCH 2/8] base demo notebooks v2 update --- .../main/notebooks/00 The Deephaven IDE.md | 15 +++-- demo/web/src/main/notebooks/07 Data Replay.md | 56 ++++++++++--------- .../src/main/notebooks/08 Table Listeners.md | 14 ++--- 3 files changed, 45 insertions(+), 40 deletions(-) diff --git a/demo/web/src/main/notebooks/00 The Deephaven IDE.md b/demo/web/src/main/notebooks/00 The Deephaven IDE.md index f10114de8e5..498a7305fda 100644 --- a/demo/web/src/main/notebooks/00 The Deephaven IDE.md +++ b/demo/web/src/main/notebooks/00 The Deephaven IDE.md @@ -31,10 +31,12 @@ print(type(x)) \ It is also used for interacting with both static tables..... ```python -from deephaven.TableTools import newTable, stringCol, intCol -static_table = newTable( - stringCol("Name_String_Col", "Data String 1", 'Data String 2', "Data String 3"), - intCol("Name_Int_Col", 44, 55, 66) +from deephaven2 import new_table +from deephaven2.column import string_col, int_col + +static_table = new_table([ + string_col("Name_String_Col", ["Data String 1", 'Data String 2', "Data String 3"]), + int_col("Name_Int_Col", [44, 55, 66]) ) ``` \ @@ -42,9 +44,10 @@ static_table = newTable( ... and dynamically updating ones. ```python -from deephaven.TableTools import timeTable +from deephaven2 import time_table + import random -updating_table = timeTable('00:00:00.400').updateView("Row = i", "Some_Int = (int)random.randint(0,100)").reverse() +updating_table = time_table('00:00:00.400').updateView("Row = i", "Some_Int = (int)(byte)random.randint(0,100)").reverse() ``` diff --git a/demo/web/src/main/notebooks/07 Data Replay.md b/demo/web/src/main/notebooks/07 Data Replay.md index ae82581557f..f560f33d9bd 100644 --- a/demo/web/src/main/notebooks/07 Data Replay.md +++ b/demo/web/src/main/notebooks/07 Data Replay.md @@ -7,27 +7,29 @@ Here, we show how to take historical data and play it back as real-time data bas To start, let's make a sample table containing random numbers generated at certain historical timestamps. ```python -from deephaven import DynamicTableWriter -from deephaven.DateTimeUtils import Period, convertDateTime, plus -import deephaven.Types as dht +from deephaven2 import DynamicTableWriter +from deephaven2.time import to_period, to_datetime, plus_period +from deephaven2.dtypes import DateTime, int_ import random table_writer = DynamicTableWriter( - ["DateTime", "Number"], - [dht.datetime, dht.int_] + { + "DateTime": DateTime, + "Number": int_, + } ) -time = convertDateTime("2000-01-01T00:00:00 NY") -time_offset = Period("T1S") +time = to_datetime("2000-01-01T00:00:00 NY") +time_offset = to_period("T1S") -result = table_writer.getTable() +result = table_writer.table for i in range(100): random_number = random.randint(1, 100) - table_writer.logRow(time, random_number) - time = plus(time, time_offset) + table_writer.write_row(time, random_number) + time = plus_period(time, time_offset) ``` After running this code, we can see that the `result` table contains 100 entries of random numbers with each number having a historical timestamp. @@ -35,15 +37,15 @@ After running this code, we can see that the `result` table contains 100 entries So how do we replay this data? Using the [`Replayer`](https://deephaven.io/core/docs/reference/table-operations/create/Replayer/) object, we can specify a start and end time, and apply this to our table. ```python -from deephaven.TableManipulation import Replayer +from deephaven2.replay import TableReplayer -start_time = convertDateTime("2000-01-01T00:00:00 NY") -end_time = convertDateTime("2000-01-01T00:01:40 NY") +start_time = to_datetime("2000-01-01T00:00:00 NY") +end_time = to_datetime("2000-01-01T00:01:40 NY") -result_replayer = Replayer(start_time, end_time) +replayer = TableReplayer(start_time, end_time) +replayed_table = replayer.add_table(result, "DateTime") -replayed_result = result_replayer.replay(result, "DateTime") -result_replayer.start() +replayer.start() ``` After running this code, the `replayed_result` table begins updating in "real-time" with our historical data. Since each of our timestamps are one second apart, the table updates with a new row every second. This gives us an exact replication of how our initial table would have been populated in real-time. @@ -51,22 +53,22 @@ After running this code, the `replayed_result` table begins updating in "real-ti Deephaven table operations do not discriminate between dynamic or static data; we can apply the same table operations to this table as we would any table. ``` -from deephaven.TableManipulation import Replayer -from deephaven import Aggregation as agg, as_list +from deephaven2.replay import TableReplayer +from deephaven2 import agg -start_time = convertDateTime("2000-01-01T00:00:00 NY") -end_time = convertDateTime("2000-01-01T00:01:40 NY") +start_time = to_datetime("2000-01-01T00:00:00 NY") +end_time = to_datetime("2000-01-01T00:01:40 NY") -result_replayer = Replayer(start_time, end_time) +replayer = TableReplayer(start_time, end_time) +replayed_table = replayer.add_table(result, "DateTime") -replayed_result = result_replayer.replay(result, "DateTime") -result_replayer.start() +replayer.start() -agg_list = as_list([ - agg.AggAvg("Number") -]) +agg_list = [ + agg.avg(["Number"]) +] -replayed_average = replayed_result.aggBy(agg_list) +replayed_average = replayed_table.agg_by(agg_list, []) ``` With this example, we can re-run our replay and see our average value updating in real-time. diff --git a/demo/web/src/main/notebooks/08 Table Listeners.md b/demo/web/src/main/notebooks/08 Table Listeners.md index 5e83518dbf8..543f82b90da 100644 --- a/demo/web/src/main/notebooks/08 Table Listeners.md +++ b/demo/web/src/main/notebooks/08 Table Listeners.md @@ -5,11 +5,11 @@ One of Deephaven's defining features is its ability to handle real-time data wit To start, let's make a table that updates every two seconds with a random number. ```python -from deephaven.TableTools import timeTable +from deephaven2 import time_table import random -table = timeTable('00:00:02').update("Number = (int)random.randint(1,100)") +table = time_table('00:00:02').update(["Number = (int)(byte)random.randint(1,100)"]) ``` As you can see, this table updates with a new row every two seconds. @@ -17,7 +17,7 @@ As you can see, this table updates with a new row every two seconds. Now let's add a listener. This listener will simply log every update to the table. ```python -from deephaven import listen +from deephaven2.table_listener import listen def log_table_update(update): print(f"FUNCTION LISTENER: update={update}") @@ -35,7 +35,7 @@ def log_table_update_with_row(update): while added_iterator.hasNext(): added_index = added_iterator.nextLong() - added_number = table.getColumnSource("Number").get(added_index) + added_number = table.j_object.getColumnSource("Number").get(added_index) print(f"Added number: {added_number}") log_table_update_with_row_handler = listen(table, log_table_update_with_row) @@ -50,8 +50,8 @@ What if we had two time tables and wanted to reuse the same listener method? We Let's start with our two time tables. ```python -table_one = timeTable('00:00:02').update("Number = (int)random.randint(1,100)") -table_two = timeTable('00:00:05').update("Number = (int)random.randint(1,100)") +table_one = time_table('00:00:02').update(["Number = (int)(byte)random.randint(1,100)"]) +table_two = time_table('00:00:05').update(["Number = (int)(byte)random.randint(1,100)"]) ``` Now let's build our function. @@ -65,7 +65,7 @@ def log_table_builder(table): while added_iterator.hasNext(): added_index = added_iterator.nextLong() - added_number = table.getColumnSource("Number").get(added_index) + added_number = table.j_object.getColumnSource("Number").get(added_index) print(f"Added number: {added_number}") return log_table_update_with_row From e779f6502ae471cf886e557712c1bd9c59a93f45 Mon Sep 17 00:00:00 2001 From: margaretkennedy <82049573+margaretkennedy@users.noreply.github.com> Date: Wed, 30 Mar 2022 13:29:24 -0400 Subject: [PATCH 3/8] Update 00 The Deephaven IDE.md --- demo/web/src/main/notebooks/00 The Deephaven IDE.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/demo/web/src/main/notebooks/00 The Deephaven IDE.md b/demo/web/src/main/notebooks/00 The Deephaven IDE.md index 498a7305fda..237b1d84897 100644 --- a/demo/web/src/main/notebooks/00 The Deephaven IDE.md +++ b/demo/web/src/main/notebooks/00 The Deephaven IDE.md @@ -29,7 +29,8 @@ print(type(x)) ``` \ \ -It is also used for interacting with both static tables..... +It is also used for interacting with both static tables... + ```python from deephaven2 import new_table from deephaven2.column import string_col, int_col @@ -52,6 +53,7 @@ updating_table = time_table('00:00:00.400').updateView("Row = i", "Some_Int = (i ## These notebooks demonstrate Deephaven differentiators and workflows + (You can find the notebooks also listed at top-right under "File Explorer".) @@ -60,7 +62,7 @@ updating_table = time_table('00:00:00.400').updateView("Row = i", "Some_Int = (i 3. [Kafka Stream vs Append](03%20Kafka%20Stream%20vs%20Append.md) -Go to [quick start](https://deephaven.io/core/docs/tutorials/quickstart/) to install Deephaven from our pre-built images. +Go to our [Quick start](https://deephaven.io/core/docs/tutorials/quickstart/) to install Deephaven from our pre-built images. Or simply [open the first notebook.](01%20Tables,%20Updates,%20and%20the%20Engine.md) From 25c9aedf807d0d6440dfb2c68bfb23dcb42e9013 Mon Sep 17 00:00:00 2001 From: margaretkennedy <82049573+margaretkennedy@users.noreply.github.com> Date: Wed, 30 Mar 2022 13:29:53 -0400 Subject: [PATCH 4/8] Update 07 Data Replay.md --- demo/web/src/main/notebooks/07 Data Replay.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demo/web/src/main/notebooks/07 Data Replay.md b/demo/web/src/main/notebooks/07 Data Replay.md index f560f33d9bd..a9f56cbf328 100644 --- a/demo/web/src/main/notebooks/07 Data Replay.md +++ b/demo/web/src/main/notebooks/07 Data Replay.md @@ -34,7 +34,7 @@ for i in range(100): After running this code, we can see that the `result` table contains 100 entries of random numbers with each number having a historical timestamp. -So how do we replay this data? Using the [`Replayer`](https://deephaven.io/core/docs/reference/table-operations/create/Replayer/) object, we can specify a start and end time, and apply this to our table. +So how do we replay this data? Using the [`replayer`](https://deephaven.io/core/docs/reference/table-operations/create/Replayer/) object, we can specify a start and end time, and apply this to our table. ```python from deephaven2.replay import TableReplayer From 74da88b05ebf4e940e105916cfd3cc4fe4a70be8 Mon Sep 17 00:00:00 2001 From: margaretkennedy <82049573+margaretkennedy@users.noreply.github.com> Date: Wed, 30 Mar 2022 13:31:06 -0400 Subject: [PATCH 5/8] Update demo/web/src/main/notebooks/A. Real-time table ops/A3 Do time series and relational joins.md --- .../A3 Do time series and relational joins.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demo/web/src/main/notebooks/A. Real-time table ops/A3 Do time series and relational joins.md b/demo/web/src/main/notebooks/A. Real-time table ops/A3 Do time series and relational joins.md index 8b139ad1adc..677c6dcd05c 100644 --- a/demo/web/src/main/notebooks/A. Real-time table ops/A3 Do time series and relational joins.md +++ b/demo/web/src/main/notebooks/A. Real-time table ops/A3 Do time series and relational joins.md @@ -26,7 +26,7 @@ Let's join these tables using an `aj` to get a single table with all of our info joined_data_aj = daily_data_0.aj(table=daily_data_1, on=["Timestamp"], joins=["Number1 = Number", "Character1 = Character", "Boolean1 = Boolean"]) ``` -Deephaven supports another type of as of join, an `raj`. For a `raj`, the values in the right table are matched to the closest values in the left table without going under the left value. For example, if the right table contains a value `5` and the left table contains values `4` and `6`, the right table's `5` will be matched on the left table's `4`. +Deephaven supports another type of as-of join, a `raj`. For a `raj`, the values in the right table are matched to the closest values in the left table without going under the left value. For example, if the right table contains a value `5` and the left table contains values `4` and `6`, the right table's `5` will be matched on the left table's `4`. Let's also join these tables using a `raj`. From 70ff452b0ee738726b7a89befddaf96ff37bd7a6 Mon Sep 17 00:00:00 2001 From: Jake Mulford Date: Mon, 4 Apr 2022 09:57:48 -0400 Subject: [PATCH 6/8] deephaven2 to deephaven fix and a few missed syntax errors --- demo/web/src/main/notebooks/00 The Deephaven IDE.md | 10 +++++----- demo/web/src/main/notebooks/07 Data Replay.md | 12 ++++++------ demo/web/src/main/notebooks/08 Table Listeners.md | 4 ++-- .../A1 Create tables and fake data .md | 2 +- .../A2 Filter and decorate .md | 6 +++--- .../A3 Do time series and relational joins.md | 2 +- .../A4 Group and aggregate .md | 4 ++-- .../A5 If-thens and conditions.md | 2 +- .../A. Real-time table ops/A6 Time methods.md | 8 ++++---- 9 files changed, 25 insertions(+), 25 deletions(-) diff --git a/demo/web/src/main/notebooks/00 The Deephaven IDE.md b/demo/web/src/main/notebooks/00 The Deephaven IDE.md index 237b1d84897..8fc334d0fc0 100644 --- a/demo/web/src/main/notebooks/00 The Deephaven IDE.md +++ b/demo/web/src/main/notebooks/00 The Deephaven IDE.md @@ -32,23 +32,23 @@ print(type(x)) It is also used for interacting with both static tables... ```python -from deephaven2 import new_table -from deephaven2.column import string_col, int_col +from deephaven import new_table +from deephaven.column import string_col, int_col static_table = new_table([ string_col("Name_String_Col", ["Data String 1", 'Data String 2', "Data String 3"]), int_col("Name_Int_Col", [44, 55, 66]) -) +]) ``` \ \ ... and dynamically updating ones. ```python -from deephaven2 import time_table +from deephaven import time_table import random -updating_table = time_table('00:00:00.400').updateView("Row = i", "Some_Int = (int)(byte)random.randint(0,100)").reverse() +updating_table = time_table('00:00:00.400').update_view(["Row = i", "Some_Int = (int)(byte)random.randint(0,100)"]).reverse() ``` diff --git a/demo/web/src/main/notebooks/07 Data Replay.md b/demo/web/src/main/notebooks/07 Data Replay.md index a9f56cbf328..d30dde02bfc 100644 --- a/demo/web/src/main/notebooks/07 Data Replay.md +++ b/demo/web/src/main/notebooks/07 Data Replay.md @@ -7,9 +7,9 @@ Here, we show how to take historical data and play it back as real-time data bas To start, let's make a sample table containing random numbers generated at certain historical timestamps. ```python -from deephaven2 import DynamicTableWriter -from deephaven2.time import to_period, to_datetime, plus_period -from deephaven2.dtypes import DateTime, int_ +from deephaven import DynamicTableWriter +from deephaven.time import to_period, to_datetime, plus_period +from deephaven.dtypes import DateTime, int_ import random @@ -37,7 +37,7 @@ After running this code, we can see that the `result` table contains 100 entries So how do we replay this data? Using the [`replayer`](https://deephaven.io/core/docs/reference/table-operations/create/Replayer/) object, we can specify a start and end time, and apply this to our table. ```python -from deephaven2.replay import TableReplayer +from deephaven.replay import TableReplayer start_time = to_datetime("2000-01-01T00:00:00 NY") end_time = to_datetime("2000-01-01T00:01:40 NY") @@ -53,8 +53,8 @@ After running this code, the `replayed_result` table begins updating in "real-ti Deephaven table operations do not discriminate between dynamic or static data; we can apply the same table operations to this table as we would any table. ``` -from deephaven2.replay import TableReplayer -from deephaven2 import agg +from deephaven.replay import TableReplayer +from deephaven import agg start_time = to_datetime("2000-01-01T00:00:00 NY") end_time = to_datetime("2000-01-01T00:01:40 NY") diff --git a/demo/web/src/main/notebooks/08 Table Listeners.md b/demo/web/src/main/notebooks/08 Table Listeners.md index 543f82b90da..95f8cccc4bb 100644 --- a/demo/web/src/main/notebooks/08 Table Listeners.md +++ b/demo/web/src/main/notebooks/08 Table Listeners.md @@ -5,7 +5,7 @@ One of Deephaven's defining features is its ability to handle real-time data wit To start, let's make a table that updates every two seconds with a random number. ```python -from deephaven2 import time_table +from deephaven import time_table import random @@ -17,7 +17,7 @@ As you can see, this table updates with a new row every two seconds. Now let's add a listener. This listener will simply log every update to the table. ```python -from deephaven2.table_listener import listen +from deephaven.table_listener import listen def log_table_update(update): print(f"FUNCTION LISTENER: update={update}") diff --git a/demo/web/src/main/notebooks/A. Real-time table ops/A1 Create tables and fake data .md b/demo/web/src/main/notebooks/A. Real-time table ops/A1 Create tables and fake data .md index e19c6a221f3..684c67eecf4 100644 --- a/demo/web/src/main/notebooks/A. Real-time table ops/A1 Create tables and fake data .md +++ b/demo/web/src/main/notebooks/A. Real-time table ops/A1 Create tables and fake data .md @@ -5,7 +5,7 @@ Throughout this demo notebook series, we show many of the ways to interact with `time_table` is a great tool to simulate real-time data. We can use this and Python's `random` library to generate some fake data. ```python -from deephaven2 import time_table +from deephaven import time_table import random import string diff --git a/demo/web/src/main/notebooks/A. Real-time table ops/A2 Filter and decorate .md b/demo/web/src/main/notebooks/A. Real-time table ops/A2 Filter and decorate .md index 0362b428c0c..e06e328a31c 100644 --- a/demo/web/src/main/notebooks/A. Real-time table ops/A2 Filter and decorate .md +++ b/demo/web/src/main/notebooks/A. Real-time table ops/A2 Filter and decorate .md @@ -5,7 +5,7 @@ In this notebook, we show how to decorate and filter our data. Let's start by simulating measurements of our values every minute. This could represent something like stock prices, temperatures, etc. ```python -from deephaven2.time import now, to_nanos, minus +from deephaven.time import now, to_nanos, minus time_interval = to_nanos("00:01:00") offset = to_nanos("240:00:00") @@ -47,8 +47,8 @@ odds_or_falses = daily_data.where_one_of(filters=["!(Number % 2 == 0)", "!Boolea Some filtering methods can apply a filter to one table based on another. ```python -from deephaven2 import new_table -from deephaven2.column import string_col +from deephaven import new_table +from deephaven.column import string_col vowels_table = new_table([ diff --git a/demo/web/src/main/notebooks/A. Real-time table ops/A3 Do time series and relational joins.md b/demo/web/src/main/notebooks/A. Real-time table ops/A3 Do time series and relational joins.md index 677c6dcd05c..cf7649d0214 100644 --- a/demo/web/src/main/notebooks/A. Real-time table ops/A3 Do time series and relational joins.md +++ b/demo/web/src/main/notebooks/A. Real-time table ops/A3 Do time series and relational joins.md @@ -5,7 +5,7 @@ In this notebook, we show how to perform joins with our time series data. Let's start again by simulating measurements of our values every minute, but this time using two tables with slightly different timestamps. This is great for a simulation because there's no guarantee that a real example will collect data with exact timestamp matches. ```python -from deephaven2.time import now, to_nanos, minus +from deephaven.time import now, to_nanos, minus time_interval = to_nanos("00:01:00") offset_0 = to_nanos("240:00:02") diff --git a/demo/web/src/main/notebooks/A. Real-time table ops/A4 Group and aggregate .md b/demo/web/src/main/notebooks/A. Real-time table ops/A4 Group and aggregate .md index 028384360d1..b1f35119be0 100644 --- a/demo/web/src/main/notebooks/A. Real-time table ops/A4 Group and aggregate .md +++ b/demo/web/src/main/notebooks/A. Real-time table ops/A4 Group and aggregate .md @@ -5,7 +5,7 @@ In this notebook, we show how to perform groupings and aggregations with our tim Let's start again by simulating some real-time data. Here, we'll use a shorter time interval. ```python -from deephaven2.time import now, to_nanos, minus +from deephaven.time import now, to_nanos, minus time_interval = to_nanos("00:00:01") offset = to_nanos("00:01:00") @@ -27,7 +27,7 @@ Just like all other real-time operations in Deephaven, these groupings update in Aggregations allow you to perform operations on grouped data. The query below computes statistics on our data. ```python -from deephaven2 import agg +from deephaven import agg average_agg = [ agg.avg(["Number"]) diff --git a/demo/web/src/main/notebooks/A. Real-time table ops/A5 If-thens and conditions.md b/demo/web/src/main/notebooks/A. Real-time table ops/A5 If-thens and conditions.md index 84e1254e2f9..6245761fdec 100644 --- a/demo/web/src/main/notebooks/A. Real-time table ops/A5 If-thens and conditions.md +++ b/demo/web/src/main/notebooks/A. Real-time table ops/A5 If-thens and conditions.md @@ -5,7 +5,7 @@ In this notebook, we show how to use if-then statements and conditionals in Deep Let's start again by simulating some real-time data. ```python -from deephaven2.time import now, to_nanos, minus +from deephaven.time import now, to_nanos, minus time_interval = to_nanos("24:00:00") offset = to_nanos("24000:00:00") diff --git a/demo/web/src/main/notebooks/A. Real-time table ops/A6 Time methods.md b/demo/web/src/main/notebooks/A. Real-time table ops/A6 Time methods.md index 03f6e057d13..362774d111c 100644 --- a/demo/web/src/main/notebooks/A. Real-time table ops/A6 Time methods.md +++ b/demo/web/src/main/notebooks/A. Real-time table ops/A6 Time methods.md @@ -5,7 +5,7 @@ In this notebook, we show how to use Deephaven's time methods on time series dat Let's start again by simulating some real-time data, but this time with a shorter time interval. ```python -from deephaven2.time import now, to_nanos, minus +from deephaven.time import now, to_nanos, minus time_interval = to_nanos("00:00:01") offset = to_nanos("00:16:40") @@ -21,7 +21,7 @@ Deephaven supports DateTime arithmetic through methods such as [plus](https://de This example shows how to subtract 2 hours from a timestamp: ```python -from deephaven2.time import to_period +from deephaven.time import to_period two_hours = to_period("T2H") @@ -37,8 +37,8 @@ With time series data, binning methods like [lowerBin](https://deephaven.io/core This example shows how to group timestamps by the minute, and then store the sum of the `Number` column for each minute. ```python -from deephaven2.time import to_nanos -from deephaven2 import agg +from deephaven.time import to_nanos +from deephaven import agg agg_list = [ agg.sum_(["Number"]) From 3dcbcd964223750726da918df5e12f2f5e3cefcf Mon Sep 17 00:00:00 2001 From: Jake Mulford Date: Mon, 4 Apr 2022 10:35:59 -0400 Subject: [PATCH 7/8] Update demo/web/src/main/notebooks/00 The Deephaven IDE.md Co-authored-by: JJ Brosnan <84038776+jjbrosnan@users.noreply.github.com> --- demo/web/src/main/notebooks/00 The Deephaven IDE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demo/web/src/main/notebooks/00 The Deephaven IDE.md b/demo/web/src/main/notebooks/00 The Deephaven IDE.md index 8fc334d0fc0..2d7f76349bd 100644 --- a/demo/web/src/main/notebooks/00 The Deephaven IDE.md +++ b/demo/web/src/main/notebooks/00 The Deephaven IDE.md @@ -62,7 +62,7 @@ updating_table = time_table('00:00:00.400').update_view(["Row = i", "Some_Int = 3. [Kafka Stream vs Append](03%20Kafka%20Stream%20vs%20Append.md) -Go to our [Quick start](https://deephaven.io/core/docs/tutorials/quickstart/) to install Deephaven from our pre-built images. +Go to our [Quick start](https://deephaven.io/core/docs/tutorials/quickstart/) guide to install Deephaven from our pre-built images. Or simply [open the first notebook.](01%20Tables,%20Updates,%20and%20the%20Engine.md) From 1ee257b11bd6eef8c3f24066cec6db7c064b80c4 Mon Sep 17 00:00:00 2001 From: Amanda L Martin Date: Mon, 4 Apr 2022 14:30:17 -0400 Subject: [PATCH 8/8] kafka changes --- .../01 Tables, Updates, and the Engine.md | 78 +++++++++---------- .../notebooks/03 Kafka Stream vs Append.md | 61 ++++++++------- 2 files changed, 69 insertions(+), 70 deletions(-) diff --git a/demo/web/src/main/notebooks/01 Tables, Updates, and the Engine.md b/demo/web/src/main/notebooks/01 Tables, Updates, and the Engine.md index 7c0e0f2466e..571234c8566 100644 --- a/demo/web/src/main/notebooks/01 Tables, Updates, and the Engine.md +++ b/demo/web/src/main/notebooks/01 Tables, Updates, and the Engine.md @@ -9,17 +9,19 @@ You can quickly see streaming data in a UI and do table operations, interactivel For example, you can listen to a Kafka stream of cryptocurrency trades sourced from their native exchanges (like the ones below, built using the [XChange library](https://github.com/knowm/XChange)). ```python -from deephaven import ConsumeKafka as ck +from deephaven import kafka_consumer as ck +from deephaven.stream.kafka.consumer import TableType, KeyValueSpec + def get_trades_stream(): - return ck.consumeToTable( - { 'bootstrap.servers' : 'demo-kafka.c.deephaven-oss.internal:9092', + return ck.consume( + { 'bootstrap.servers' : 'demo-kafka.c.deephaven-oss.internal:9092', 'schema.registry.url' : 'http://demo-kafka.c.deephaven-oss.internal:8081' }, 'io.deephaven.crypto.kafka.TradesTopic', - key = ck.IGNORE, - value = ck.avro('io.deephaven.crypto.kafka.TradesTopic-io.deephaven.crypto.Trade'), + key_spec=KeyValueSpec.IGNORE, + value_spec = ck.avro_spec('io.deephaven.crypto.kafka.TradesTopic-io.deephaven.crypto.Trade'), offsets=ck.ALL_PARTITIONS_SEEK_TO_END, - table_type='append') + table_type=TableType.Append) trades_stream = get_trades_stream() ``` @@ -40,7 +42,7 @@ Tables and streams are a single abstraction. Event streams, feeds, [soon] CDC, a You can readily see that your table grows as greater volumes of data are inherited from the Kafka feed. ```python -row_count = trades_stream.countBy("Tot_Rows") +row_count = trades_stream.count_by(col="Tot_Rows") ``` \ \ @@ -58,8 +60,8 @@ As you might expect, a named table can have multiple dependencies.\ After you run the following command, you'll see that all three of your tables are now updating in lock-step. ```python -row_count_by_instrument = trades_stream.countBy("Tot_Rows", "Instrument")\ - .sortDescending("Tot_Rows") +row_count_by_instrument = trades_stream.count_by(col="Tot_Rows", by = ["Instrument"])\ + .sort_descending(order_by = ["Tot_Rows"]) ``` \ \ @@ -69,10 +71,10 @@ Below is one way to use a replace() method to swap one for the other.\ To learn more about updateView (or other selection or projection alternatives), refer to [the docs](https://deephaven.io/core/docs/conceptual/choose-select-view-update/). ```python -trades_stream_cleaner = trades_stream.updateView("Instrument = Instrument.replace(`USDT`, `USD`)") +trades_stream_cleaner = trades_stream.update_view(formulas = ["Instrument = Instrument.replace(`USDT`, `USD`)"]) -row_count_by_instrument = trades_stream_cleaner.countBy("Tot_Rows", "Instrument")\ - .sortDescending("Tot_Rows") +row_count_by_instrument = trades_stream_cleaner.count_by(col="Tot_Rows", by = ["Instrument"])\ + .sort_descending(order_by = ["Tot_Rows"]) ``` \ \ @@ -80,20 +82,19 @@ row_count_by_instrument = trades_stream_cleaner.countBy("Tot_Rows", "Instrument" Counts are informative, but often you'll be interested in other aggregations. The script below shows both how to [bin data by time](https://deephaven.io/core/docs/reference/cheat-sheets/datetime-cheat-sheet/#downsampling-temporal-data-via-time-binning) and to [do multiple aggregations](https://deephaven.io/core/docs/how-to-guides/combined-aggregations/). ```python -from deephaven import Aggregation as agg, as_list - -agg_list = as_list([ - agg.AggCount("Trade_Count"), - agg.AggSum("Total_Size = Size"), - agg.AggAvg("Avg_Size = Size", "Avg_Price = Price"), - agg.AggMin("Low_Price = Price"), - agg.AggMax("High_Price = Price") -]) - -multi_agg = trades_stream_cleaner.updateView("TimeBin = upperBin(KafkaTimestamp, MINUTE)")\ - .aggBy(agg_list, "TimeBin", "Instrument")\ - .sortDescending("TimeBin", "Trade_Count")\ - .formatColumnWhere("Instrument", "Instrument = `BTC/USD`", "CYAN") +from deephaven import agg as agg + +agg_list = [ + agg.count_(col = "Trade_Count"), + agg.sum_(cols = ["Total_Size = Size"]), + agg.avg(cols=["Avg_Size = Size", "Avg_Price = Price"]), + agg.min_(cols = ["Low_Price = Price"]), + agg.max_(cols = ["High_Price = Price"]) +] + +multi_agg = trades_stream_cleaner.update_view(formulas = ["TimeBin = upperBin(KafkaTimestamp, MINUTE)"])\ + .agg_by(agg_list, by = ["TimeBin", "Instrument"])\ + .sort_descending(order_by = ["TimeBin", "Trade_Count"]) ``` \ \ @@ -102,14 +103,13 @@ Filtering streams is straightforward. One simply uses `where()` to impose a huge ```python # Filter on a manually-set filter -multi_agg_btc = multi_agg.where("Instrument = `BTC/USD`") -multi_agg_eth = multi_agg.where("Instrument = `ETH/USD`") +multi_agg_btc = multi_agg.where(["Instrument = `BTC/USD`"]) +multi_agg_eth = multi_agg.where(["Instrument = `ETH/USD`"]) # Filter on a programatically set criteria top_instrument = multi_agg.head(1) -multi_agg_row_0 = multi_agg.whereIn(top_instrument, "Instrument")\ - .formatColumns("Total_Size = heatmap(Total_Size, 10, 300, MAGENTA, CYAN)") +multi_agg_row_0 = multi_agg.where_in(top_instrument, ["Instrument"]) ``` \ \ @@ -117,10 +117,9 @@ multi_agg_row_0 = multi_agg.whereIn(top_instrument, "Instrument")\ [Joining streams](https://deephaven.io/core/docs/how-to-guides/joins-overview/) is one of Deephaven's superpowers . Deephaven supports both high-performance joins that are (i) relational in nature ....... ```python -join_eth_btc = multi_agg_eth.view("TimeBin", "Eth_Avg_Price = Avg_Price")\ - .naturalJoin(multi_agg_btc, "TimeBin", "Btc_Avg_Price = Avg_Price")\ - .updateView("Ratio_Avg_Prices = Btc_Avg_Price / Eth_Avg_Price")\ - .formatColumns("Eth_Avg_Price = Decimal(`#,###.00`)", "Btc_Avg_Price = Decimal(`#,###.00`)") +join_eth_btc = multi_agg_eth.view(["TimeBin", "Eth_Avg_Price = Avg_Price"])\ + .natural_join(table=multi_agg_btc, on=["TimeBin"], joins=["Btc_Avg_Price = Avg_Price"])\ + .update_view(["Ratio_Avg_Prices = Btc_Avg_Price / Eth_Avg_Price"]) ``` \ \ @@ -132,13 +131,12 @@ join_eth_btc = multi_agg_eth.view("TimeBin", "Eth_Avg_Price = Avg_Price")\ # KafkaTimestamp in the right table (btc_trades). # If there is no exact nanosecond match, the record with KafkaTimestamp just preceding Eth_Time is used -eth_trades = trades_stream.where("Instrument = `ETH/USD`") -btc_trades = trades_stream.where("Instrument = `BTC/USD`") +eth_trades = trades_stream.where(["Instrument = `ETH/USD`"]) +btc_trades = trades_stream.where(["Instrument = `BTC/USD`"]) -time_series_join_eth_btc = eth_trades.view("Eth_Time = KafkaTimestamp", "Eth_Price = Price")\ - .aj(btc_trades, "Eth_Time = KafkaTimestamp", "Btc_Price = Price, Btc_Time = KafkaTimestamp")\ - .updateView("Ratio_Each_Trade = Btc_Price / Eth_Price")\ - .formatColumns("Eth_Price = Decimal(`#,###.00`)", "Btc_Price = Decimal(`#,###.00`)") +time_series_join_eth_btc = eth_trades.view(["Eth_Time = KafkaTimestamp", "Eth_Price = Price"])\ + .aj(btc_trades, on=["Eth_Time = KafkaTimestamp"],joins=["Btc_Price = Price, Btc_Time = KafkaTimestamp"])\ + .update_view(["Ratio_Each_Trade = Btc_Price / Eth_Price"]) ``` \ \ diff --git a/demo/web/src/main/notebooks/03 Kafka Stream vs Append.md b/demo/web/src/main/notebooks/03 Kafka Stream vs Append.md index 3afb23ef6c3..a5475949fc3 100644 --- a/demo/web/src/main/notebooks/03 Kafka Stream vs Append.md +++ b/demo/web/src/main/notebooks/03 Kafka Stream vs Append.md @@ -17,8 +17,12 @@ Start by importing some requisite packages. There is documentation on [installin [aggBy](https://deephaven.io/core/docs/reference/table-operations/group-and-aggregate/aggBy/), [emptyTable](https://deephaven.io/core/docs/how-to-guides/empty-table/#related-documentation), and [merge](https://deephaven.io/core/docs/how-to-guides/merge-tables/#merge-tables). ```python -from deephaven import ConsumeKafka as ck, Aggregation as agg, as_list -from deephaven.TableTools import emptyTable, merge +from deephaven import kafka_consumer as ck +from deephaven.stream.kafka.consumer import TableType, KeyValueSpec +from deephaven import empty_table, merge +from deephaven import agg as agg + + ``` \ @@ -31,13 +35,13 @@ The feed started on September 10th, 2021. A month later it had ~ 110 million eve This demo will demonstrate the impact of choices related to `offsets` and `table_type`. ```python -def get_trades(*, offsets, table_type): - return ck.consumeToTable( +def get_trades(*, offsets,table_type): + return ck.consume( { 'bootstrap.servers' : 'demo-kafka.c.deephaven-oss.internal:9092', 'schema.registry.url' : 'http://demo-kafka.c.deephaven-oss.internal:8081' }, 'io.deephaven.crypto.kafka.TradesTopic', - key = ck.IGNORE, - value = ck.avro('io.deephaven.crypto.kafka.TradesTopic-io.deephaven.crypto.Trade'), + key_spec=KeyValueSpec.IGNORE, + value_spec = ck.avro_spec('io.deephaven.crypto.kafka.TradesTopic-io.deephaven.crypto.Trade'), offsets=offsets, table_type=table_type) ``` @@ -50,16 +54,16 @@ In this demo, imagine you want to start your Kafka feed "1 million events ago" ( Create a Deephaven table that listens to current records (-- i.e. crypto trades happening now). ```python -latest_offset = get_trades(offsets=ck.ALL_PARTITIONS_SEEK_TO_END, table_type='stream') +latest_offset = get_trades(offsets=ck.ALL_PARTITIONS_SEEK_TO_END, table_type=TableType.Stream) ``` \ \ \ -You can do a simple [lastBy()](https://deephaven.io/core/docs/reference/table-operations/group-and-aggregate/lastBy/) to refine the view to only the single last record. +You can do a simple [last_by()](https://deephaven.io/core/docs/reference/table-operations/group-and-aggregate/lastBy/) to refine the view to only the single last record. ```python -latest_offset = latest_offset.view("KafkaOffset").lastBy() +latest_offset = latest_offset.view(["KafkaOffset"]).last_by() ``` \ @@ -69,7 +73,7 @@ Since you are targeting a Kafka offset of "1 mm events ago", create a static tab [Snapshotting](https://deephaven.io/core/docs/how-to-guides/reduce-update-frequency/#create-a-static-snapshot) to an empty table does the trick. ```python -latest_offset = emptyTable(0).snapshot(latest_offset, True) +latest_offset = empty_table(0).snapshot(source_table=latest_offset, do_init=True) ``` \ @@ -79,8 +83,8 @@ With the static table, you can now set a variable by pulling a record from the t ```python size_offset = 1_000_000 -target_offset_table = latest_offset.view("Offset=max(0, KafkaOffset-size_offset)") -offset_variable = target_offset_table.getColumn("Offset").get(0) +target_offset_table = latest_offset.view(["Offset=max(0, KafkaOffset-size_offset)"]) +offset_variable = target_offset_table.j_object.getColumnSource("Offset").get(0) ``` \ @@ -100,13 +104,12 @@ Define a [table aggregation function](https://deephaven.io/core/docs/reference/t ```python def trades_agg(table): - agg_list = as_list([ - agg.AggCount("Trade_Count"), - agg.AggSum("Total_Size = Size"), - ]) - return table.aggBy(agg_list, "Exchange", "Instrument").\ - sort("Exchange", "Instrument").\ - formatColumnWhere("Instrument", "Instrument.startsWith(`BTC`)", "IVORY") + agg_list = [ + agg.count_(col = "Trade_Count"), + agg.sum_(cols = ["Total_Size = Size"]), + ] + return table.agg_by(agg_list, by=["Exchange", "Instrument"]).\ + sort(order_by = ["Exchange", "Instrument"]) ``` \ @@ -120,9 +123,9 @@ Create a new appending table, starting at the `offset_variable` you defined abov You will see that the count is growing. It should reach > 1mm records quickly. ```python -trades_append = get_trades(offsets={ 0: offset_variable }, table_type='append') +trades_append = get_trades(offsets={ 0: offset_variable }, table_type=TableType.Append) agg_append = trades_agg(trades_append) -row_count_append = trades_append.countBy("RowCount").updateView("Table_Type = `append`") +row_count_append = trades_append.count_by(col="RowCount").update_view(["Table_Type = `append`"]) ``` \ @@ -134,13 +137,11 @@ Touch the table tab called `agg_append` to see the Trade\_Count and Total\_Size \ For comparison, repeat the exercise, changing only the `table_type` parameter of the Kafka integration to be _**stream**_ (instead of _**append**_). -Note the `dropStream()` syntax. ```python -trades_stream = get_trades(offsets={ 0: offset_variable }, table_type='stream') +trades_stream = get_trades(offsets={ 0: offset_variable }, table_type=TableType.Stream) agg_stream = trades_agg(trades_stream) -row_count_stream = trades_stream.dropStream()\ -.countBy("RowCount").updateView("Table_Type = `stream`") +row_count_stream = trades_stream.count_by(col="RowCount").update_view(["Table_Type = `stream`"]) ``` \ @@ -158,7 +159,7 @@ However, stream tables have the ingested records in memory only for a split seco Merging the two row_count tables makes the difference obvious. ```python -row_count_merge = merge(row_count_append, row_count_stream) +row_count_merge = merge([row_count_append, row_count_stream]) ``` \ @@ -167,10 +168,10 @@ row_count_merge = merge(row_count_append, row_count_stream) You can confirm the two aggregation tables are identical by [joining them](https://deephaven.io/core/docs/reference/table-operations/join/natural-join/) and taking the difference. ```python -diff_table = agg_append.naturalJoin(agg_stream, "Exchange, Instrument",\ - "Trade_Count_Agg = Trade_Count, Total_Size_Agg = Total_Size")\ - .view("Exchange", "Instrument", "Diff_Trade_Count = Trade_Count - Trade_Count_Agg", - "Diff_Total_Size = Total_Size - Total_Size_Agg") +diff_table = agg_append.natural_join(table =agg_stream, on = ["Exchange, Instrument"],\ + joins = ["Trade_Count_Agg = Trade_Count, Total_Size_Agg = Total_Size"])\ + .view(["Exchange", "Instrument", "Diff_Trade_Count = Trade_Count - Trade_Count_Agg", + "Diff_Total_Size = Total_Size - Total_Size_Agg"]) ``` \