Module transform

Module transform 

Source
Expand description

Transformation specifications and helpers.

This module defines engine-agnostic transformation specs in crate-owned types that can be applied to an in-memory crate::types::DataSet.

Phase 1 intent:

  • Keep public API free of Polars types
  • Implement by compiling to the Polars-backed crate::pipeline::DataFrame where possible
  • Reserve room for additional backends later

§Example

use rust_data_processing::pipeline::CastMode;
use rust_data_processing::transform::{TransformSpec, TransformStep};
use rust_data_processing::types::{DataSet, DataType, Field, Schema, Value};

let ds = DataSet::new(
    Schema::new(vec![
        Field::new("id", DataType::Int64),
        Field::new("score", DataType::Int64),
        Field::new("weather", DataType::Utf8),
    ]),
    vec![
        vec![Value::Int64(1), Value::Int64(10), Value::Utf8("drizzle".to_string())],
        vec![Value::Int64(2), Value::Null, Value::Utf8("rain".to_string())],
    ],
);

let out_schema = Schema::new(vec![
    Field::new("id", DataType::Int64),
    Field::new("score_f", DataType::Float64),
    Field::new("wx", DataType::Utf8),
]);

let spec = TransformSpec::new(out_schema.clone())
    .with_step(TransformStep::Rename {
        pairs: vec![("weather".to_string(), "wx".to_string())],
    })
    .with_step(TransformStep::Rename {
        pairs: vec![("score".to_string(), "score_f".to_string())],
    })
    .with_step(TransformStep::Cast {
        column: "score_f".to_string(),
        to: DataType::Float64,
        mode: CastMode::Lossy,
    })
    .with_step(TransformStep::FillNull {
        column: "score_f".to_string(),
        value: Value::Float64(0.0),
    })
    .with_step(TransformStep::Select {
        columns: vec!["id".to_string(), "score_f".to_string(), "wx".to_string()],
    });

let out = spec.apply(&ds)?;
assert_eq!(out.schema, out_schema);

Structs§

TransformSpec
A user-provided transformation specification with an explicit output schema.

Enums§

TransformStep
A transformation step in a TransformSpec.