rust_data_processing/ingestion/
builder.rs

1use std::sync::Arc;
2
3use crate::error::IngestionResult;
4use crate::types::{DataSet, Schema};
5
6use super::observability::IngestionObserver;
7use super::observability::IngestionSeverity;
8use super::unified::{ExcelSheetSelection, IngestionFormat, IngestionOptions, ingest_from_path};
9
10/// Builder for [`IngestionOptions`].
11///
12/// Prefer this over constructing [`IngestionOptions`] directly when you want to:
13/// - avoid long struct literals in user code
14/// - keep configuration engine-agnostic (no Polars/DataFusion types leak into signatures)
15/// - lean on sensible defaults and override only what you need
16#[derive(Debug, Clone)]
17pub struct IngestionOptionsBuilder {
18    options: IngestionOptions,
19}
20
21impl Default for IngestionOptionsBuilder {
22    fn default() -> Self {
23        Self::new()
24    }
25}
26
27impl IngestionOptionsBuilder {
28    /// Create a builder with [`IngestionOptions::default`] values.
29    pub fn new() -> Self {
30        Self {
31            options: IngestionOptions::default(),
32        }
33    }
34
35    /// Force a specific ingestion format (otherwise inferred from path extension).
36    pub fn format(mut self, format: IngestionFormat) -> Self {
37        self.options.format = Some(format);
38        self
39    }
40
41    /// Configure Excel sheet selection.
42    pub fn excel_sheet_selection(mut self, sel: ExcelSheetSelection) -> Self {
43        self.options.excel_sheet_selection = sel;
44        self
45    }
46
47    /// Configure an observer for success/failure/alerts.
48    pub fn observer(mut self, observer: Arc<dyn IngestionObserver>) -> Self {
49        self.options.observer = Some(observer);
50        self
51    }
52
53    /// Configure the severity threshold at which `on_alert` is invoked.
54    pub fn alert_at_or_above(mut self, sev: IngestionSeverity) -> Self {
55        self.options.alert_at_or_above = sev;
56        self
57    }
58
59    /// Build the configured [`IngestionOptions`].
60    pub fn build(self) -> IngestionOptions {
61        self.options
62    }
63
64    /// Convenience: ingest using the configured options.
65    pub fn ingest_from_path(
66        self,
67        path: impl AsRef<std::path::Path>,
68        schema: &Schema,
69    ) -> IngestionResult<DataSet> {
70        let opts = self.build();
71        ingest_from_path(path, schema, &opts)
72    }
73}
74
75#[cfg(test)]
76mod tests {
77    use super::IngestionOptionsBuilder;
78    use crate::ingestion::{
79        ExcelSheetSelection, IngestionFormat, IngestionOptions, IngestionSeverity,
80    };
81
82    #[test]
83    fn builder_defaults_match_ingestion_options_default() {
84        let built = IngestionOptionsBuilder::new().build();
85        let direct = IngestionOptions::default();
86
87        assert_eq!(built.format, direct.format);
88        assert_eq!(built.excel_sheet_selection, direct.excel_sheet_selection);
89        assert_eq!(built.alert_at_or_above, direct.alert_at_or_above);
90        assert_eq!(built.observer.is_some(), direct.observer.is_some());
91    }
92
93    #[test]
94    fn builder_sets_fields() {
95        let built = IngestionOptionsBuilder::new()
96            .format(IngestionFormat::Csv)
97            .excel_sheet_selection(ExcelSheetSelection::AllSheets)
98            .alert_at_or_above(IngestionSeverity::Error)
99            .build();
100
101        assert_eq!(built.format, Some(IngestionFormat::Csv));
102        assert_eq!(built.excel_sheet_selection, ExcelSheetSelection::AllSheets);
103        assert_eq!(built.alert_at_or_above, IngestionSeverity::Error);
104    }
105}