From 780062f4a81a7b694cfb3f9bd604e6358e0b26f6 Mon Sep 17 00:00:00 2001 From: comphead Date: Thu, 10 Oct 2024 11:40:29 -0700 Subject: [PATCH] WIP. Test gen doc from macros. to_date --- datafusion/functions/src/datetime/to_date.rs | 39 +++++ datafusion/functions/src/math/log.rs | 25 +-- datafusion/macros/Cargo.toml | 2 +- datafusion/macros/src/lib.rs | 79 +++++++++- datafusion/pre-macros/src/lib.rs | 157 ++++++++++++++++++- 5 files changed, 275 insertions(+), 27 deletions(-) diff --git a/datafusion/functions/src/datetime/to_date.rs b/datafusion/functions/src/datetime/to_date.rs index 2803fd042b99..eee42cf2a543 100644 --- a/datafusion/functions/src/datetime/to_date.rs +++ b/datafusion/functions/src/datetime/to_date.rs @@ -26,9 +26,42 @@ use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, }; +use datafusion_macros::udf_doc; use std::any::Any; use std::sync::OnceLock; +#[udf_doc( + doc_section(include = "true", label = "Time and Date Functions"), + description = r"Converts a value to a date (`YYYY-MM-DD`). +Supports strings, integer and double types as input. +Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. +Integers and doubles are interpreted as days since the unix epoch (`1970-01-01T00:00:00Z`). +Returns the corresponding date. +Note: `to_date` returns Date32, which represents its values as the number of days since unix epoch(`1970-01-01`) stored as signed 32 bit value. The largest supported date value is `9999-12-31`.", + syntax_example = "to_date('2017-05-31', '%Y-%m-%d')", + sql_example = "```sql\n\ +> select to_date('2023-01-31');\n\ ++-----------------------------+\n\ +| to_date(Utf8(\"2023-01-31\")) |\n\ ++-----------------------------+\n\ +| 2023-01-31 |\n\ ++-----------------------------+\n\ +> select to_date('2023/01/31', '%Y-%m-%d', '%Y/%m/%d');\n\ ++---------------------------------------------------------------+\n\ +| to_date(Utf8(\"2023/01/31\"),Utf8(\"%Y-%m-%d\"),Utf8(\"%Y/%m/%d\")) |\n\ ++---------------------------------------------------------------+\n\ +| 2023-01-31 |\n\ ++---------------------------------------------------------------+\n\ +```\n\n\ +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_date.rs)", + standard_argument(name = "expression", expression_type = "String"), + argument( + name = "format_n", + description = r"Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order + they appear with the first successful one being returned. If none of the formats successfully parse the expression + an error will be returned." + ) +)] #[derive(Debug)] pub struct ToDateFunc { signature: Signature, @@ -376,4 +409,10 @@ mod tests { ); } } + + #[test] + fn test_doc() { + let dt = ToDateFunc::new(); + println!("{:?}", dt.documentation_test()); + } } diff --git a/datafusion/functions/src/math/log.rs b/datafusion/functions/src/math/log.rs index 19735bca7c49..889e3761d26c 100644 --- a/datafusion/functions/src/math/log.rs +++ b/datafusion/functions/src/math/log.rs @@ -17,7 +17,6 @@ //! Math function: `log()`. -use datafusion_macros::udf_doc; use std::any::Any; use std::sync::{Arc, OnceLock}; @@ -38,7 +37,6 @@ use datafusion_expr::{ }; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; -#[udf_doc(description = "log_description", example = "log_example")] #[derive(Debug)] pub struct LogFunc { signature: Signature, @@ -59,8 +57,10 @@ fn get_log_doc() -> &'static Documentation { .with_description("Returns the base-x logarithm of a number. Can either provide a specified base, or if omitted then takes the base-10 of a number.") .with_syntax_example(r#"log(base, numeric_expression) log(numeric_expression)"#) - .with_standard_argument("base", "Base numeric") - .with_standard_argument("numeric_expression", "Numeric") + .with_argument("base", + "Base numeric expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.") + .with_argument("numeric_expression", + "Numeric expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.") .build() .unwrap() }) @@ -185,6 +185,10 @@ impl ScalarUDFImpl for LogFunc { Ok(ColumnarValue::Array(arr)) } + fn documentation(&self) -> Option<&Documentation> { + Some(get_log_doc()) + } + /// Simplify the `log` function by the relevant rules: /// 1. Log(a, 1) ===> 0 /// 2. Log(a, Power(a, b)) ===> b @@ -262,7 +266,6 @@ mod tests { use datafusion_common::DFSchema; use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::simplify::SimplifyContext; - use datafusion_pre_macros::DocumentationTest; #[test] fn test_log_f64() { @@ -471,16 +474,4 @@ mod tests { SortProperties::Unordered ); } - - #[test] - fn test_doc() { - let log = LogFunc::new(); - assert_eq!( - log.documentation_test(), - Some(DocumentationTest { - description: "log_description".to_string(), - syntax_example: "log_example".to_string(), - }) - ); - } } diff --git a/datafusion/macros/Cargo.toml b/datafusion/macros/Cargo.toml index 5d0fd00198bb..abe188d88aae 100644 --- a/datafusion/macros/Cargo.toml +++ b/datafusion/macros/Cargo.toml @@ -37,7 +37,7 @@ proc-macro = true [dependencies] quote = "1.0.37" -syn = "2.0.79" +syn = { version = "2.0.79", features = ["full"] } proc-macro2 = "1.0" datafusion-pre-macros = { workspace = true } diff --git a/datafusion/macros/src/lib.rs b/datafusion/macros/src/lib.rs index f512d54befa5..ef9ac131ab67 100644 --- a/datafusion/macros/src/lib.rs +++ b/datafusion/macros/src/lib.rs @@ -22,41 +22,104 @@ use syn::{parse_macro_input, DeriveInput, LitStr}; #[proc_macro_attribute] pub fn udf_doc(args: TokenStream, input: TokenStream) -> TokenStream { + let mut doc_section_include: Option = None; + let mut doc_section_lbl: Option = None; + let mut doc_section_desc: Option = None; + let mut description: Option = None; - let mut example: Option = None; + let mut syntax_example: Option = None; + let mut sql_example: Option = None; + let mut standard_args: Vec<(Option, Option)> = vec![]; + let mut udf_args: Vec<(Option, Option)> = vec![]; let parser = syn::meta::parser(|meta| { - if meta.path.is_ident("description") { + if meta.path.is_ident("doc_section") { + meta.parse_nested_meta(|meta| { + //dbg!(meta.path); + if meta.path.is_ident("include") { + doc_section_include = meta.value()?.parse()?; + return Ok(()); + } else if meta.path.is_ident("label") { + doc_section_lbl = meta.value()?.parse()?; + return Ok(()); + } else if meta.path.is_ident("description") { + doc_section_desc = meta.value()?.parse()?; + return Ok(()); + } + Ok(()) + }) + } else if meta.path.is_ident("description") { description = Some(meta.value()?.parse()?); Ok(()) - } else if meta.path.is_ident("example") { - example = Some(meta.value()?.parse()?); + } else if meta.path.is_ident("syntax_example") { + syntax_example = Some(meta.value()?.parse()?); + Ok(()) + } else if meta.path.is_ident("sql_example") { + sql_example = Some(meta.value()?.parse()?); Ok(()) + } else if meta.path.is_ident("standard_argument") { + let mut standard_arg: (Option, Option) = (None, None); + meta.parse_nested_meta(|meta| { + if meta.path.is_ident("name") { + standard_arg.0 = meta.value()?.parse()?; + return Ok(()); + } else if meta.path.is_ident("expression_type") { + standard_arg.1 = meta.value()?.parse()?; + return Ok(()); + } + standard_args.push(standard_arg.clone()); + Ok(()) + }) + } else if meta.path.is_ident("argument") { + let mut arg: (Option, Option) = (None, None); + meta.parse_nested_meta(|meta| { + if meta.path.is_ident("name") { + arg.0 = meta.value()?.parse()?; + return Ok(()); + } else if meta.path.is_ident("description") { + arg.1 = meta.value()?.parse()?; + return Ok(()); + } + udf_args.push(arg.clone()); + Ok(()) + }) } else { Err(meta.error("unsupported property")) } }); parse_macro_input!(args with parser); - eprintln!("description={description:?} example={example:?}"); // Parse the input struct let input = parse_macro_input!(input as DeriveInput); let name = input.clone().ident; - //eprintln!("input={input:?}"); + eprintln!("doc_section_include=cc{doc_section_include:?}cc"); + let doc_section_include: bool = doc_section_include.unwrap().value().parse().unwrap(); let expanded = quote! { #input use datafusion_pre_macros::DocumentationTest; + use datafusion_pre_macros::DocSectionTest; + use datafusion_pre_macros::DocumentationBuilderTest; + + static DOCUMENTATION_TEST: OnceLock = OnceLock::new(); impl #name { - fn documentation_test(&self) -> Option { - Some(DocumentationTest { description: #description.to_string(), syntax_example: #example.to_string() }) + fn documentation_test(&self) -> Option<&DocumentationTest> { + Some(DOCUMENTATION_TEST.get_or_init(|| { + DocumentationTest::builder() + .with_doc_section(DocSectionTest { include: #doc_section_include, label: #doc_section_lbl, description: Some("") }) + .with_description(#description.to_string()) + .with_syntax_example(#syntax_example.to_string()) + .build() + })) } } }; + eprintln!("{}", expanded); + // Return the generated code TokenStream::from(expanded) } diff --git a/datafusion/pre-macros/src/lib.rs b/datafusion/pre-macros/src/lib.rs index 6d54de7388dd..0ff0d36a0e75 100644 --- a/datafusion/pre-macros/src/lib.rs +++ b/datafusion/pre-macros/src/lib.rs @@ -15,10 +15,165 @@ // specific language governing permissions and limitations // under the License. -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone)] pub struct DocumentationTest { + /// the section in the documentation where the UDF will be documented + pub doc_section: DocSectionTest, /// the description for the UDF pub description: String, /// a brief example of the syntax. For example "ascii(str)" pub syntax_example: String, + /// a sql example for the UDF, usually in the form of a sql prompt + /// query and output. It is strongly recommended to provide an + /// example for anything but the most basic UDF's + pub sql_example: Option, + /// arguments for the UDF which will be displayed in array order. + /// Left member of a pair is the argument name, right is a + /// description for the argument + pub arguments: Option>, + /// related functions if any. Values should match the related + /// udf's name exactly. Related udf's must be of the same + /// UDF type (scalar, aggregate or window) for proper linking to + /// occur + pub related_udfs: Option>, +} + +impl DocumentationTest { + /// Returns a new [`DocumentationBuilder`] with no options set. + pub fn builder() -> DocumentationBuilderTest { + DocumentationBuilderTest::new() + } +} + +#[derive(Debug, Clone, PartialEq)] +pub struct DocSectionTest { + /// true to include this doc section in the public + /// documentation, false otherwise + pub include: bool, + /// a display label for the doc section. For example: "Math Expressions" + pub label: &'static str, + /// an optional description for the doc section + pub description: Option<&'static str>, +} + +pub struct DocumentationBuilderTest { + pub doc_section: Option, + pub description: Option, + pub syntax_example: Option, + pub sql_example: Option, + pub arguments: Option>, + pub related_udfs: Option>, +} + +impl DocumentationBuilderTest { + pub fn new() -> Self { + Self { + doc_section: None, + description: None, + syntax_example: None, + sql_example: None, + arguments: None, + related_udfs: None, + } + } + + pub fn with_doc_section(mut self, doc_section: DocSectionTest) -> Self { + self.doc_section = Some(doc_section); + self + } + + pub fn with_description(mut self, description: impl Into) -> Self { + self.description = Some(description.into()); + self + } + + pub fn with_syntax_example(mut self, syntax_example: impl Into) -> Self { + self.syntax_example = Some(syntax_example.into()); + self + } + + pub fn with_sql_example(mut self, sql_example: impl Into) -> Self { + self.sql_example = Some(sql_example.into()); + self + } + + /// Adds documentation for a specific argument to the documentation. + /// + /// Arguments are displayed in the order they are added. + pub fn with_argument( + mut self, + arg_name: impl Into, + arg_description: impl Into, + ) -> Self { + let mut args = self.arguments.unwrap_or_default(); + args.push((arg_name.into(), arg_description.into())); + self.arguments = Some(args); + self + } + + /// Add a standard "expression" argument to the documentation + /// + /// This is similar to [`Self::with_argument`] except that a standard + /// description is appended to the end: `"Can be a constant, column, or + /// function, and any combination of arithmetic operators."` + /// + /// The argument is rendered like + /// + /// ```text + /// : + /// expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators. + /// ``` + pub fn with_standard_argument( + self, + arg_name: impl Into, + expression_type: impl AsRef, + ) -> Self { + let expression_type = expression_type.as_ref(); + self.with_argument(arg_name, format!("{expression_type} expression to operate on. Can be a constant, column, or function, and any combination of operators.")) + } + + pub fn with_related_udf(mut self, related_udf: impl Into) -> Self { + let mut related = self.related_udfs.unwrap_or_default(); + related.push(related_udf.into()); + self.related_udfs = Some(related); + self + } + + pub fn build(self) -> DocumentationTest { + let Self { + doc_section, + description, + syntax_example, + sql_example, + arguments, + related_udfs, + } = self; + + if doc_section.is_none() { + panic!("Documentation must have a doc section"); + } + + if description.is_none() { + panic!("Documentation must have a description"); + } + + if syntax_example.is_none() { + panic!("Documentation must have a syntax_example"); + } + + DocumentationTest { + doc_section: doc_section.unwrap(), + description: description.unwrap(), + syntax_example: syntax_example.unwrap(), + sql_example, + arguments, + related_udfs, + } + } +} + +impl Default for DocumentationBuilderTest { + fn default() -> Self { + Self::new() + } }