Skip to content

Commit

Permalink
Document arrow datatypes (#7986)
Browse files Browse the repository at this point in the history
  • Loading branch information
emilk authored Nov 4, 2024
1 parent 85d152a commit d1d2db2
Show file tree
Hide file tree
Showing 115 changed files with 1,283 additions and 305 deletions.
1 change: 1 addition & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -6251,6 +6251,7 @@ name = "re_types_builder"
version = "0.20.0-alpha.1+dev"
dependencies = [
"anyhow",
"arrow",
"camino",
"clang-format",
"flatbuffers",
Expand Down
3 changes: 2 additions & 1 deletion crates/build/re_types_builder/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ re_tracing.workspace = true

# External
anyhow.workspace = true
arrow2.workspace = true
arrow.workspace = true
arrow2 = { workspace = true, features = ["arrow"] }
camino.workspace = true
clang-format.workspace = true
flatbuffers.workspace = true
Expand Down
75 changes: 75 additions & 0 deletions crates/build/re_types_builder/src/codegen/docs/arrow_datatype.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
//! Document an Arrow datatype as human-readable markdown.
//!
//! Note that we use the `arrow` library in this module,
//! with just a thin `arrow2` wrapper around it.
use arrow::datatypes::DataType;

use crate::codegen::StringExt as _;

pub fn arrow2_datatype_docs(page: &mut String, datatype: &arrow2::datatypes::DataType) {
arrow_datatype_docs(page, 0, &DataType::from(datatype.clone()));
}

pub fn arrow_datatype_docs(page: &mut String, indent: usize, datatype: &DataType) {
match datatype {
DataType::Null => page.push_str("null"),
DataType::Boolean => page.push_str("boolean"),
DataType::Int8 => page.push_str("int8"),
DataType::Int16 => page.push_str("int16"),
DataType::Int32 => page.push_str("int32"),
DataType::Int64 => page.push_str("int64"),
DataType::UInt8 => page.push_str("uint8"),
DataType::UInt16 => page.push_str("uint16"),
DataType::UInt32 => page.push_str("uint32"),
DataType::UInt64 => page.push_str("uint64"),
DataType::Float16 => page.push_str("float16"),
DataType::Float32 => page.push_str("float32"),
DataType::Float64 => page.push_str("float64"),
DataType::Utf8 => page.push_str("utf8"),
DataType::List(inner) => {
page.push_str("List<");
arrow_datatype_docs(page, indent + 1, inner.data_type());
page.push('>');
}
DataType::FixedSizeList(inner, length) => {
page.push_str(&format!("FixedSizeList<{length}, "));
arrow_datatype_docs(page, indent + 1, inner.data_type());
page.push('>');
}
DataType::Struct(fields) => {
page.push_str("Struct {\n");
for field in fields {
page.push_indented(indent + 1, field.name(), 0);
page.push_str(": ");
if field.is_nullable() {
page.push_str("nullable ");
}
arrow_datatype_docs(page, indent + 1, field.data_type());
page.push('\n');
}
page.push_indented(indent, "}", 0);
}
DataType::Union(union_fields, union_mode) => {
match union_mode {
arrow::datatypes::UnionMode::Sparse => page.push_str("SparseUnion {\n"),
arrow::datatypes::UnionMode::Dense => page.push_str("DenseUnion {\n"),
}
for (index, field) in union_fields.iter() {
page.push_indented(indent + 1, &format!("{index} = {:?}: ", field.name()), 0);
if field.is_nullable() {
page.push_str("nullable ");
}
arrow_datatype_docs(page, indent + 1, field.data_type());
page.push('\n');
}
page.push_indented(indent, "}", 0);
}
_ => {
unimplemented!(
"For the docs, you need to implement formatting of arrow datatype {:#?}",
datatype
);
}
}
}
98 changes: 71 additions & 27 deletions crates/build/re_types_builder/src/codegen/docs/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
//! Generate the markdown files shown at <https://rerun.io/docs/reference/types>.
mod arrow_datatype;

use std::{collections::BTreeMap, fmt::Write};

use camino::Utf8PathBuf;
Expand All @@ -14,9 +16,10 @@ use crate::{

pub const DATAFRAME_VIEW_FQNAME: &str = "rerun.blueprint.views.DataframeView";

/// Like [`writeln!`], but without a [`Result`].
macro_rules! putln {
($o:ident) => ( writeln!($o).ok() );
($o:ident, $($tt:tt)*) => ( writeln!($o, $($tt)*).ok() );
($o:ident) => ( { writeln!($o).ok(); } );
($o:ident, $($tt:tt)*) => ( { writeln!($o, $($tt)*).unwrap(); } );
}

pub struct DocsCodeGenerator {
Expand Down Expand Up @@ -44,7 +47,7 @@ impl CodeGenerator for DocsCodeGenerator {
&mut self,
reporter: &Reporter,
objects: &Objects,
_arrow_registry: &crate::ArrowRegistry,
arrow_registry: &crate::ArrowRegistry,
) -> GeneratedFiles {
re_tracing::profile_function!();

Expand Down Expand Up @@ -74,7 +77,13 @@ impl CodeGenerator for DocsCodeGenerator {
ObjectKind::View => views.push(object),
}

let page = object_page(reporter, objects, object, &views_per_archetype);
let page = object_page(
reporter,
objects,
object,
arrow_registry,
&views_per_archetype,
);
let path = self.docs_dir.join(format!(
"{}/{}.md",
object.kind.plural_snake_case(),
Expand Down Expand Up @@ -229,6 +238,7 @@ fn object_page(
reporter: &Reporter,
objects: &Objects,
object: &Object,
arrow_registry: &crate::ArrowRegistry,
views_per_archetype: &ViewsPerArchetype,
) -> String {
let is_unreleased = object.is_attr_set(crate::ATTR_DOCS_UNRELEASED);
Expand Down Expand Up @@ -292,6 +302,16 @@ fn object_page(
}
}

if matches!(object.kind, ObjectKind::Datatype | ObjectKind::Component) {
let datatype = &arrow_registry.get(&object.fqname);
putln!(page);
putln!(page, "## Arrow datatype");
putln!(page, "```");
arrow_datatype::arrow2_datatype_docs(&mut page, datatype);
putln!(page);
putln!(page, "```");
}

putln!(page);
putln!(page, "## API reference links");
list_links(is_unreleased, &mut page, object);
Expand Down Expand Up @@ -416,19 +436,20 @@ fn write_fields(objects: &Objects, o: &mut String, object: &Object) {
match ty {
Type::Unit => unreachable!("Should be handled elsewhere"),

Type::UInt8 => atomic("u8"),
Type::UInt16 => atomic("u16"),
Type::UInt32 => atomic("u32"),
Type::UInt64 => atomic("u64"),
Type::Int8 => atomic("i8"),
Type::Int16 => atomic("i16"),
Type::Int32 => atomic("i32"),
Type::Int64 => atomic("i64"),
Type::Bool => atomic("bool"),
Type::Float16 => atomic("f16"),
Type::Float32 => atomic("f32"),
Type::Float64 => atomic("f64"),
Type::String => atomic("string"),
// We use explicit, arrow-like names:
Type::UInt8 => atomic("uint8"),
Type::UInt16 => atomic("uint16"),
Type::UInt32 => atomic("uint32"),
Type::UInt64 => atomic("uint64"),
Type::Int8 => atomic("int8"),
Type::Int16 => atomic("int16"),
Type::Int32 => atomic("int32"),
Type::Int64 => atomic("int64"),
Type::Bool => atomic("boolean"),
Type::Float16 => atomic("float16"),
Type::Float32 => atomic("float32"),
Type::Float64 => atomic("float64"),
Type::String => atomic("utf8"),

Type::Array { elem_type, length } => {
format!(
Expand All @@ -438,7 +459,7 @@ fn write_fields(objects: &Objects, o: &mut String, object: &Object) {
}
Type::Vector { elem_type } => {
format!(
"list of {}",
"List of {}",
type_info(objects, &Type::from(elem_type.clone()))
)
}
Expand All @@ -454,17 +475,41 @@ fn write_fields(objects: &Objects, o: &mut String, object: &Object) {
}
}

if object.is_arrow_transparent() {
debug_assert!(object.is_struct());
debug_assert_eq!(object.fields.len(), 1);
return; // This is just a wrapper type, so don't show the "Fields" section
}

let mut fields = Vec::new();
for field in &object.fields {
if object.is_enum() || field.typ == Type::Unit {
fields.push(format!("* {}", field.name));
} else {
fields.push(format!(
"* {}: {}",
field.name,
type_info(objects, &field.typ)
));
let mut field_string = format!("#### `{}`", field.name);

if let Some(enum_value) = field.enum_value {
field_string.push_str(&format!(" = {enum_value}"));
}
field_string.push('\n');

if !object.is_enum() {
field_string.push_str("Type: ");
if field.typ == Type::Unit {
field_string.push_str("`null`");
} else {
if field.is_nullable {
field_string.push_str("nullable ");
}
field_string.push_str(&type_info(objects, &field.typ));
}
field_string.push('\n');
field_string.push('\n');
}

for line in field.docs.lines_for(objects, Target::WebDocsMarkdown) {
field_string.push_str(&line);
field_string.push('\n');
}

fields.push(field_string);
}

if !fields.is_empty() {
Expand All @@ -473,7 +518,6 @@ fn write_fields(objects: &Objects, o: &mut String, object: &Object) {
crate::ObjectClass::Enum | crate::ObjectClass::Union => "## Variants",
};
putln!(o, "{heading}");
putln!(o);
for field in fields {
putln!(o, "{field}");
}
Expand Down
30 changes: 24 additions & 6 deletions docs/content/reference/types/components/aggregation_policy.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions docs/content/reference/types/components/albedo_factor.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 22 additions & 2 deletions docs/content/reference/types/components/annotation_context.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions docs/content/reference/types/components/axis_length.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions docs/content/reference/types/components/blob.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions docs/content/reference/types/components/class_id.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit d1d2db2

Please sign in to comment.