rust_data_processing/ingestion/
mod.rs

1//! Ingestion entrypoints and implementations.
2//!
3//! Most callers should use [`ingest_from_path`] (from [`unified`]) which:
4//!
5//! - auto-detects format by file extension (or you can override via [`IngestionOptions`])
6//! - performs ingestion into an in-memory [`crate::types::DataSet`]
7//! - optionally reports success/failure/alerts to an [`IngestionObserver`]
8//!
9//! For ergonomic configuration, prefer [`IngestionOptionsBuilder`] over constructing
10//! [`IngestionOptions`] directly.
11//!
12//! Format-specific functions are also available under:
13//! - [`csv`]
14//! - [`excel`]
15//! - [`json`]
16//! - [`parquet`]
17
18pub mod builder;
19pub mod csv;
20#[cfg(feature = "excel")]
21pub mod excel;
22#[cfg(not(feature = "excel"))]
23pub mod excel {
24    //! Excel ingestion stubs when the `excel` feature is disabled.
25    //!
26    //! This keeps the public module path stable (`rust_data_processing::ingestion::excel`)
27    //! while avoiding pulling Excel dependencies into the default build.
28
29    use std::path::Path;
30
31    use crate::error::{IngestionError, IngestionResult};
32    use crate::types::{DataSet, Schema};
33
34    fn disabled() -> IngestionError {
35        IngestionError::SchemaMismatch {
36            message: "excel ingestion is disabled; enable Cargo feature 'excel'".to_string(),
37        }
38    }
39
40    pub fn ingest_excel_from_path(
41        _path: impl AsRef<Path>,
42        _sheet_name: Option<&str>,
43        _schema: &Schema,
44    ) -> IngestionResult<DataSet> {
45        Err(disabled())
46    }
47
48    pub fn ingest_excel_workbook_from_path(
49        _path: impl AsRef<Path>,
50        _sheet_names: Option<&[&str]>,
51        _schema: &Schema,
52    ) -> IngestionResult<DataSet> {
53        Err(disabled())
54    }
55
56    pub fn infer_excel_schema_from_path(
57        _path: impl AsRef<Path>,
58        _sheet_name: Option<&str>,
59    ) -> IngestionResult<Schema> {
60        Err(disabled())
61    }
62
63    pub fn infer_excel_schema_workbook_from_path(
64        _path: impl AsRef<Path>,
65        _sheet_names: Option<&[&str]>,
66    ) -> IngestionResult<Schema> {
67        Err(disabled())
68    }
69}
70#[cfg(feature = "db_connectorx")]
71pub mod db;
72pub mod json;
73pub mod parquet;
74#[cfg(not(feature = "db_connectorx"))]
75pub mod db {
76    //! Direct DB ingestion stubs when `db_connectorx` is disabled.
77    //!
78    //! Enable with `--features db_connectorx` plus a source, e.g. `--features db_mysql`.
79
80    use crate::error::{IngestionError, IngestionResult};
81    use crate::types::{DataSet, Schema};
82
83    fn disabled() -> IngestionError {
84        IngestionError::SchemaMismatch {
85            message: "db ingestion is disabled; enable Cargo feature 'db_connectorx'".to_string(),
86        }
87    }
88
89    pub fn ingest_from_db(_conn: &str, _query: &str, _schema: &Schema) -> IngestionResult<DataSet> {
90        Err(disabled())
91    }
92
93    pub fn ingest_from_db_infer(_conn: &str, _query: &str) -> IngestionResult<DataSet> {
94        Err(disabled())
95    }
96}
97pub mod observability;
98pub(crate) mod polars_bridge;
99pub mod unified;
100
101pub use builder::IngestionOptionsBuilder;
102pub use observability::{
103    CompositeObserver, FileObserver, IngestionContext, IngestionObserver, IngestionSeverity,
104    IngestionStats, StdErrObserver,
105};
106pub use unified::{
107    ExcelSheetSelection, IngestionFormat, IngestionOptions, IngestionRequest,
108    infer_schema_from_path, ingest_from_path, ingest_from_path_infer,
109};
110
111pub use db::{ingest_from_db, ingest_from_db_infer};