Skip to content

Commit

Permalink
chore: add docs, part of #37
Browse files Browse the repository at this point in the history
- add pragma `#![warn(missing_docs)]` to `arrow-buffer`, `arrow-cast`, `arrow-csv`
- add docs to the same to remove lint warnings
  • Loading branch information
ByteBaker committed Sep 20, 2024
1 parent ddf19d3 commit f58e8f1
Show file tree
Hide file tree
Showing 12 changed files with 69 additions and 9 deletions.
1 change: 1 addition & 0 deletions arrow-buffer/src/bigint/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ impl i256 {
}
}

/// Create an integer value from its 128-bit counterpart.
pub const fn from_i128(v: i128) -> Self {
Self::from_parts(v as u128, v >> 127)
}
Expand Down
9 changes: 4 additions & 5 deletions arrow-buffer/src/builder/null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ impl NullBufferBuilder {
}
}

/// Return a mutable reference to the inner bitmap slice.
pub fn as_slice_mut(&mut self) -> Option<&mut [u8]> {
self.bitmap_builder.as_mut().map(|b| b.as_slice_mut())
}
Expand All @@ -173,14 +174,12 @@ impl NullBufferBuilder {
}

impl NullBufferBuilder {
/// Return the number of bits in the buffer.
pub fn len(&self) -> usize {
if let Some(b) = &self.bitmap_builder {
b.len()
} else {
self.len
}
self.bitmap_builder.as_ref().map_or(self.len, |b| b.len())
}

/// Check if the builder is empty.
pub fn is_empty(&self) -> bool {
self.len() == 0
}
Expand Down
2 changes: 1 addition & 1 deletion arrow-buffer/src/builder/offset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@ use std::ops::Deref;

use crate::{ArrowNativeType, OffsetBuffer};

/// Builder of [`OffsetBuffer`]
#[derive(Debug)]
pub struct OffsetBufferBuilder<O: ArrowNativeType> {
offsets: Vec<O>,
last_offset: usize,
}

/// Builder of [`OffsetBuffer`]
impl<O: ArrowNativeType> OffsetBufferBuilder<O> {
/// Create a new builder with space for `capacity + 1` offsets
pub fn new(capacity: usize) -> Self {
Expand Down
5 changes: 5 additions & 0 deletions arrow-buffer/src/interval.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,11 @@ use std::ops::Neg;
#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
#[repr(C)]
pub struct IntervalMonthDayNano {
/// Number of months
pub months: i32,
/// Number of days
pub days: i32,
/// Number of nanoseconds
pub nanoseconds: i64,
}

Expand Down Expand Up @@ -345,7 +348,9 @@ derive_arith!(
#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
#[repr(C)]
pub struct IntervalDayTime {
/// Number of days
pub days: i32,
/// Number of milliseconds
pub milliseconds: i32,
}

Expand Down
1 change: 1 addition & 0 deletions arrow-buffer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

// used by [`buffer::mutable::dangling_ptr`]
#![cfg_attr(miri, feature(strict_provenance))]
#![warn(missing_docs)]

pub mod alloc;
pub mod buffer;
Expand Down
9 changes: 9 additions & 0 deletions arrow-buffer/src/util/bit_chunk_iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,26 +131,32 @@ impl<'a> UnalignedBitChunk<'a> {
}
}

/// Returns the number of leading padding bits
pub fn lead_padding(&self) -> usize {
self.lead_padding
}

/// Returns the number of trailing padding bits
pub fn trailing_padding(&self) -> usize {
self.trailing_padding
}

/// Returns the prefix, if any
pub fn prefix(&self) -> Option<u64> {
self.prefix
}

/// Returns the suffix, if any
pub fn suffix(&self) -> Option<u64> {
self.suffix
}

/// Returns reference to the chunks
pub fn chunks(&self) -> &'a [u64] {
self.chunks
}

/// Returns an iterator over the chunks
pub fn iter(&self) -> UnalignedBitChunkIterator<'a> {
self.prefix
.into_iter()
Expand All @@ -164,6 +170,7 @@ impl<'a> UnalignedBitChunk<'a> {
}
}

/// Iterator over an [`UnalignedBitChunk`]
pub type UnalignedBitChunkIterator<'a> = std::iter::Chain<
std::iter::Chain<std::option::IntoIter<u64>, std::iter::Cloned<std::slice::Iter<'a, u64>>>,
std::option::IntoIter<u64>,
Expand Down Expand Up @@ -212,6 +219,7 @@ pub struct BitChunks<'a> {
}

impl<'a> BitChunks<'a> {
/// Create a new [`BitChunks`] from a byte array, and and an offset and length in bits
pub fn new(buffer: &'a [u8], offset: usize, len: usize) -> Self {
assert!(ceil(offset + len, 8) <= buffer.len() * 8);

Expand All @@ -232,6 +240,7 @@ impl<'a> BitChunks<'a> {
}
}

/// Iterator over chunks of 64 bits represented as an u64
#[derive(Debug)]
pub struct BitChunkIterator<'a> {
buffer: &'a [u8],
Expand Down
1 change: 1 addition & 0 deletions arrow-cast/src/display.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ impl<'a> Default for FormatOptions<'a> {
}

impl<'a> FormatOptions<'a> {
/// Creates a new set of format options
pub const fn new() -> Self {
Self {
safe: true,
Expand Down
2 changes: 2 additions & 0 deletions arrow-cast/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
// under the License.

//! Functions for converting from one data type to another in [Apache Arrow](https://docs.rs/arrow)

#![warn(missing_docs)]
pub mod cast;
pub use cast::*;
pub mod display;
Expand Down
29 changes: 28 additions & 1 deletion arrow-cast/src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -432,8 +432,12 @@ fn string_to_time(s: &str) -> Option<NaiveTime> {
/// assert_eq!(ts, 1609459200123456789);
/// ```
pub trait Parser: ArrowPrimitiveType {
/// Method to parse a string to the native type
fn parse(string: &str) -> Option<Self::Native>;

/// Method to parse a string to the native type with a format string
///
/// When not implemented, the format string is unused, and this method is equivalent to [parse](#tymethod.parse)
fn parse_formatted(string: &str, _format: &str) -> Option<Self::Native> {
Self::parse(string)
}
Expand Down Expand Up @@ -966,6 +970,7 @@ pub fn parse_decimal<T: DecimalType>(
})
}

/// Parse human-readable interval string to Arrow [IntervalYearMonthType]
pub fn parse_interval_year_month(
value: &str,
) -> Result<<IntervalYearMonthType as ArrowPrimitiveType>::Native, ArrowError> {
Expand All @@ -981,6 +986,7 @@ pub fn parse_interval_year_month(
Ok(IntervalYearMonthType::make_value(0, months))
}

/// Parse human-readable interval string to Arrow [IntervalDayTimeType]
pub fn parse_interval_day_time(
value: &str,
) -> Result<<IntervalDayTimeType as ArrowPrimitiveType>::Native, ArrowError> {
Expand All @@ -994,6 +1000,7 @@ pub fn parse_interval_day_time(
Ok(IntervalDayTimeType::make_value(days, millis))
}

/// Parse human-readable interval string to Arrow [IntervalMonthDayNanoType]
pub fn parse_interval_month_day_nano_config(
value: &str,
config: IntervalParseConfig,
Expand All @@ -1005,6 +1012,7 @@ pub fn parse_interval_month_day_nano_config(
Ok(IntervalMonthDayNanoType::make_value(months, days, nanos))
}

/// Parse human-readable interval string to Arrow [IntervalMonthDayNanoType]
pub fn parse_interval_month_day_nano(
value: &str,
) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
Expand All @@ -1018,14 +1026,18 @@ const NANOS_PER_HOUR: i64 = 60 * NANOS_PER_MINUTE;
#[cfg(test)]
const NANOS_PER_DAY: i64 = 24 * NANOS_PER_HOUR;

/// Config to parse interval strings
///
/// Currently stores the `default_unit` to use if the string doesn't have one specified
#[derive(Debug, Clone)]
pub struct IntervalParseConfig {
/// The default unit to use if none is specified
/// e.g. `INTERVAL 1` represents `INTERVAL 1 SECOND` when default_unit = IntervalType::Second
/// e.g. `INTERVAL 1` represents `INTERVAL 1 SECOND` when default_unit = [IntervalUnit::Second]
default_unit: IntervalUnit,
}

impl IntervalParseConfig {
/// Create a new [IntervalParseConfig] with the given default unit
pub fn new(default_unit: IntervalUnit) -> Self {
Self { default_unit }
}
Expand All @@ -1034,18 +1046,32 @@ impl IntervalParseConfig {
#[rustfmt::skip]
#[derive(Debug, Clone, Copy)]
#[repr(u16)]
/// Represents the units of an interval, with each variant
/// corresponding to a bit in the interval's bitfield representation
pub enum IntervalUnit {
/// A Century
Century = 0b_0000_0000_0001,
/// A Decade
Decade = 0b_0000_0000_0010,
/// A Year
Year = 0b_0000_0000_0100,
/// A Month
Month = 0b_0000_0000_1000,
/// A Week
Week = 0b_0000_0001_0000,
/// A Day
Day = 0b_0000_0010_0000,
/// An Hour
Hour = 0b_0000_0100_0000,
/// A Minute
Minute = 0b_0000_1000_0000,
/// A Second
Second = 0b_0001_0000_0000,
/// A Millisecond
Millisecond = 0b_0010_0000_0000,
/// A Microsecond
Microsecond = 0b_0100_0000_0000,
/// A Nanosecond
Nanosecond = 0b_1000_0000_0000,
}

Expand Down Expand Up @@ -1093,6 +1119,7 @@ impl IntervalUnit {
}
}

/// A tuple representing (months, days, nanoseconds) in an interval
pub type MonthDayNano = (i32, i32, i64);

/// Chosen based on the number of decimal digits in 1 week in nanoseconds
Expand Down
3 changes: 2 additions & 1 deletion arrow-cast/src/pretty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ pub fn pretty_format_columns(
pretty_format_columns_with_options(col_name, results, &options)
}

pub fn pretty_format_columns_with_options(
/// Utility function to create a visual representation of columns with options
fn pretty_format_columns_with_options(
col_name: &str,
results: &[ArrayRef],
options: &FormatOptions,
Expand Down
2 changes: 2 additions & 0 deletions arrow-csv/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

//! Transfer data between the Arrow memory format and CSV (comma-separated values).

#![warn(missing_docs)]

pub mod reader;
pub mod writer;

Expand Down
14 changes: 13 additions & 1 deletion arrow-csv/src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -236,31 +236,39 @@ pub struct Format {
}

impl Format {
/// Specify whether the CSV file has a header, defaults to `true`
///
/// When `true`, the first row of the CSV file is treated as a header row
pub fn with_header(mut self, has_header: bool) -> Self {
self.header = has_header;
self
}

/// Specify a custom delimiter character, defaults to comma `','`
pub fn with_delimiter(mut self, delimiter: u8) -> Self {
self.delimiter = Some(delimiter);
self
}

/// Specify an escape character
pub fn with_escape(mut self, escape: u8) -> Self {
self.escape = Some(escape);
self
}

/// Specify a custom quote character, defaults to double quote `'"'`
pub fn with_quote(mut self, quote: u8) -> Self {
self.quote = Some(quote);
self
}

/// Specify a custom terminator character, defaults to CRLF
pub fn with_terminator(mut self, terminator: u8) -> Self {
self.terminator = Some(terminator);
self
}

/// Specify a comment character, lines starting with this character will be ignored
pub fn with_comment(mut self, comment: u8) -> Self {
self.comment = Some(comment);
self
Expand Down Expand Up @@ -1100,7 +1108,7 @@ impl ReaderBuilder {
self
}

/// Overrides the [`Format`] of this [`ReaderBuilder]
/// Overrides the [Format] of this [ReaderBuilder]
pub fn with_format(mut self, format: Format) -> Self {
self.format = format;
self
Expand All @@ -1112,21 +1120,25 @@ impl ReaderBuilder {
self
}

/// Set the given character as the CSV file's escape character
pub fn with_escape(mut self, escape: u8) -> Self {
self.format.escape = Some(escape);
self
}

/// Set the given character as the CSV file's quote character, by default it is double quote
pub fn with_quote(mut self, quote: u8) -> Self {
self.format.quote = Some(quote);
self
}

/// Provide a custom terminator character, defaults to CRLF
pub fn with_terminator(mut self, terminator: u8) -> Self {
self.format.terminator = Some(terminator);
self
}

/// Provide a comment character, lines starting with this character will be ignored
pub fn with_comment(mut self, comment: u8) -> Self {
self.format.comment = Some(comment);
self
Expand Down

0 comments on commit f58e8f1

Please sign in to comment.