Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Document arrow datatypes #7986

Merged
merged 8 commits into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -6242,6 +6242,7 @@ name = "re_types_builder"
version = "0.20.0-alpha.1+dev"
dependencies = [
"anyhow",
"arrow",
"camino",
"clang-format",
"flatbuffers",
Expand Down
3 changes: 2 additions & 1 deletion crates/build/re_types_builder/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ re_tracing.workspace = true

# External
anyhow.workspace = true
arrow2.workspace = true
arrow.workspace = true
arrow2 = { workspace = true, features = ["arrow"] }
camino.workspace = true
clang-format.workspace = true
flatbuffers.workspace = true
Expand Down
75 changes: 75 additions & 0 deletions crates/build/re_types_builder/src/codegen/docs/arrow_datatype.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
//! Document an Arrow datatype as human-readable markdown.
//!
//! Note that we use the `arrow` library in this module,
//! with just a thin `arrow2` wrapper around it.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just to make it easier to migrate away from arrow2

use arrow::datatypes::DataType;

use crate::codegen::StringExt as _;

pub fn arrow2_datatype_docs(page: &mut String, datatype: &arrow2::datatypes::DataType) {
arrow_datatype_docs(page, 0, &DataType::from(datatype.clone()));
}

pub fn arrow_datatype_docs(page: &mut String, indent: usize, datatype: &DataType) {
match datatype {
DataType::Null => page.push_str("null"),
DataType::Boolean => page.push_str("boolean"),
DataType::Int8 => page.push_str("int8"),
DataType::Int16 => page.push_str("int16"),
DataType::Int32 => page.push_str("int32"),
DataType::Int64 => page.push_str("int64"),
DataType::UInt8 => page.push_str("uint8"),
DataType::UInt16 => page.push_str("uint16"),
DataType::UInt32 => page.push_str("uint32"),
DataType::UInt64 => page.push_str("uint64"),
DataType::Float16 => page.push_str("float16"),
DataType::Float32 => page.push_str("float32"),
DataType::Float64 => page.push_str("float64"),
DataType::Utf8 => page.push_str("utf8"),
DataType::List(inner) => {
page.push_str("List<");
arrow_datatype_docs(page, indent + 1, inner.data_type());
page.push('>');
}
DataType::FixedSizeList(inner, length) => {
page.push_str(&format!("FixedSizeList<{length}, "));
arrow_datatype_docs(page, indent + 1, inner.data_type());
page.push('>');
}
DataType::Struct(fields) => {
page.push_str("Struct {\n");
for field in fields {
page.push_indented(indent + 1, field.name(), 0);
page.push_str(": ");
if field.is_nullable() {
page.push_str("nullable ");
}
arrow_datatype_docs(page, indent + 1, field.data_type());
page.push('\n');
}
page.push_indented(indent, "}", 0);
}
DataType::Union(union_fields, union_mode) => {
match union_mode {
arrow::datatypes::UnionMode::Sparse => page.push_str("SparseUnion {\n"),
arrow::datatypes::UnionMode::Dense => page.push_str("DenseUnion {\n"),
}
for (index, field) in union_fields.iter() {
page.push_indented(indent + 1, &format!("{index} = {:?}: ", field.name()), 0);
if field.is_nullable() {
page.push_str("nullable ");
}
arrow_datatype_docs(page, indent + 1, field.data_type());
page.push('\n');
}
page.push_indented(indent, "}", 0);
}
_ => {
unimplemented!(
"For the docs, you need to implement formatting of arrow datatype {:#?}",
datatype
);
}
}
}
98 changes: 71 additions & 27 deletions crates/build/re_types_builder/src/codegen/docs/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
//! Generate the markdown files shown at <https://rerun.io/docs/reference/types>.
mod arrow_datatype;

use std::{collections::BTreeMap, fmt::Write};

use camino::Utf8PathBuf;
Expand All @@ -14,9 +16,10 @@ use crate::{

pub const DATAFRAME_VIEW_FQNAME: &str = "rerun.blueprint.views.DataframeView";

/// Like [`writeln!`], but without a [`Result`].
macro_rules! putln {
($o:ident) => ( writeln!($o).ok() );
($o:ident, $($tt:tt)*) => ( writeln!($o, $($tt)*).ok() );
($o:ident) => ( { writeln!($o).ok(); } );
($o:ident, $($tt:tt)*) => ( { writeln!($o, $($tt)*).unwrap(); } );
}

pub struct DocsCodeGenerator {
Expand Down Expand Up @@ -44,7 +47,7 @@ impl CodeGenerator for DocsCodeGenerator {
&mut self,
reporter: &Reporter,
objects: &Objects,
_arrow_registry: &crate::ArrowRegistry,
arrow_registry: &crate::ArrowRegistry,
) -> GeneratedFiles {
re_tracing::profile_function!();

Expand Down Expand Up @@ -74,7 +77,13 @@ impl CodeGenerator for DocsCodeGenerator {
ObjectKind::View => views.push(object),
}

let page = object_page(reporter, objects, object, &views_per_archetype);
let page = object_page(
reporter,
objects,
object,
arrow_registry,
&views_per_archetype,
);
let path = self.docs_dir.join(format!(
"{}/{}.md",
object.kind.plural_snake_case(),
Expand Down Expand Up @@ -229,6 +238,7 @@ fn object_page(
reporter: &Reporter,
objects: &Objects,
object: &Object,
arrow_registry: &crate::ArrowRegistry,
views_per_archetype: &ViewsPerArchetype,
) -> String {
let is_unreleased = object.is_attr_set(crate::ATTR_DOCS_UNRELEASED);
Expand Down Expand Up @@ -292,6 +302,16 @@ fn object_page(
}
}

if matches!(object.kind, ObjectKind::Datatype | ObjectKind::Component) {
let datatype = &arrow_registry.get(&object.fqname);
putln!(page);
putln!(page, "## Arrow datatype");
putln!(page, "```");
arrow_datatype::arrow2_datatype_docs(&mut page, datatype);
putln!(page);
putln!(page, "```");
}

putln!(page);
putln!(page, "## API reference links");
list_links(is_unreleased, &mut page, object);
Expand Down Expand Up @@ -416,19 +436,20 @@ fn write_fields(objects: &Objects, o: &mut String, object: &Object) {
match ty {
Type::Unit => unreachable!("Should be handled elsewhere"),

Type::UInt8 => atomic("u8"),
Type::UInt16 => atomic("u16"),
Type::UInt32 => atomic("u32"),
Type::UInt64 => atomic("u64"),
Type::Int8 => atomic("i8"),
Type::Int16 => atomic("i16"),
Type::Int32 => atomic("i32"),
Type::Int64 => atomic("i64"),
Type::Bool => atomic("bool"),
Type::Float16 => atomic("f16"),
Type::Float32 => atomic("f32"),
Type::Float64 => atomic("f64"),
Type::String => atomic("string"),
// We use explicit, arrow-like names:
Type::UInt8 => atomic("uint8"),
Type::UInt16 => atomic("uint16"),
Type::UInt32 => atomic("uint32"),
Type::UInt64 => atomic("uint64"),
Type::Int8 => atomic("int8"),
Type::Int16 => atomic("int16"),
Type::Int32 => atomic("int32"),
Type::Int64 => atomic("int64"),
Type::Bool => atomic("boolean"),
Type::Float16 => atomic("float16"),
Type::Float32 => atomic("float32"),
Type::Float64 => atomic("float64"),
Type::String => atomic("utf8"),

Type::Array { elem_type, length } => {
format!(
Expand All @@ -438,7 +459,7 @@ fn write_fields(objects: &Objects, o: &mut String, object: &Object) {
}
Type::Vector { elem_type } => {
format!(
"list of {}",
"List of {}",
type_info(objects, &Type::from(elem_type.clone()))
)
}
Expand All @@ -454,17 +475,41 @@ fn write_fields(objects: &Objects, o: &mut String, object: &Object) {
}
}

if object.is_arrow_transparent() {
debug_assert!(object.is_struct());
debug_assert_eq!(object.fields.len(), 1);
return; // This is just a wrapper type, so don't show the "Fields" section
}

let mut fields = Vec::new();
for field in &object.fields {
if object.is_enum() || field.typ == Type::Unit {
fields.push(format!("* {}", field.name));
} else {
fields.push(format!(
"* {}: {}",
field.name,
type_info(objects, &field.typ)
));
let mut field_string = format!("#### `{}`", field.name);

if let Some(enum_value) = field.enum_value {
field_string.push_str(&format!(" = {enum_value}"));
}
field_string.push('\n');

if !object.is_enum() {
field_string.push_str("Type: ");
if field.typ == Type::Unit {
field_string.push_str("`null`");
} else {
if field.is_nullable {
field_string.push_str("nullable ");
}
field_string.push_str(&type_info(objects, &field.typ));
}
field_string.push('\n');
field_string.push('\n');
}

for line in field.docs.lines_for(objects, Target::WebDocsMarkdown) {
field_string.push_str(&line);
field_string.push('\n');
}

fields.push(field_string);
}

if !fields.is_empty() {
Expand All @@ -473,7 +518,6 @@ fn write_fields(objects: &Objects, o: &mut String, object: &Object) {
crate::ObjectClass::Enum | crate::ObjectClass::Union => "## Variants",
};
putln!(o, "{heading}");
putln!(o);
for field in fields {
putln!(o, "{field}");
}
Expand Down
30 changes: 24 additions & 6 deletions docs/content/reference/types/components/aggregation_policy.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions docs/content/reference/types/components/albedo_factor.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 22 additions & 2 deletions docs/content/reference/types/components/annotation_context.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions docs/content/reference/types/components/axis_length.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions docs/content/reference/types/components/blob.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions docs/content/reference/types/components/class_id.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading