Expand description
Transformation specifications and helpers.
This module defines engine-agnostic transformation specs in crate-owned types that can be
applied to an in-memory crate::types::DataSet.
Phase 1 intent:
- Keep public API free of Polars types
- Implement by compiling to the Polars-backed
crate::pipeline::DataFramewhere possible - Reserve room for additional backends later
§Example
use rust_data_processing::pipeline::CastMode;
use rust_data_processing::transform::{TransformSpec, TransformStep};
use rust_data_processing::types::{DataSet, DataType, Field, Schema, Value};
let ds = DataSet::new(
Schema::new(vec![
Field::new("id", DataType::Int64),
Field::new("score", DataType::Int64),
Field::new("weather", DataType::Utf8),
]),
vec![
vec![Value::Int64(1), Value::Int64(10), Value::Utf8("drizzle".to_string())],
vec![Value::Int64(2), Value::Null, Value::Utf8("rain".to_string())],
],
);
let out_schema = Schema::new(vec![
Field::new("id", DataType::Int64),
Field::new("score_f", DataType::Float64),
Field::new("wx", DataType::Utf8),
]);
let spec = TransformSpec::new(out_schema.clone())
.with_step(TransformStep::Rename {
pairs: vec![("weather".to_string(), "wx".to_string())],
})
.with_step(TransformStep::Rename {
pairs: vec![("score".to_string(), "score_f".to_string())],
})
.with_step(TransformStep::Cast {
column: "score_f".to_string(),
to: DataType::Float64,
mode: CastMode::Lossy,
})
.with_step(TransformStep::FillNull {
column: "score_f".to_string(),
value: Value::Float64(0.0),
})
.with_step(TransformStep::Select {
columns: vec!["id".to_string(), "score_f".to_string(), "wx".to_string()],
});
let out = spec.apply(&ds)?;
assert_eq!(out.schema, out_schema);Structs§
- Transform
Spec - A user-provided transformation specification with an explicit output schema.
Enums§
- Transform
Step - A transformation step in a
TransformSpec.