rust_data_processing/processing/
map.rs

1//! Row mapping for [`crate::types::DataSet`].
2
3use crate::types::{DataSet, Value};
4
5/// Returns a new [`DataSet`] by applying `mapper` to every row.
6///
7/// This is a convenience wrapper around [`DataSet::map_rows`].
8///
9/// # Panics
10///
11/// Panics if `mapper` returns rows with a different length than the schema field count.
12pub fn map<F>(dataset: &DataSet, mapper: F) -> DataSet
13where
14    F: FnMut(&[Value]) -> Vec<Value>,
15{
16    dataset.map_rows(mapper)
17}
18
19#[cfg(test)]
20mod tests {
21    use super::map;
22    use crate::types::{DataSet, DataType, Field, Schema, Value};
23
24    fn sample_dataset() -> DataSet {
25        let schema = Schema::new(vec![
26            Field::new("id", DataType::Int64),
27            Field::new("active", DataType::Bool),
28            Field::new("name", DataType::Utf8),
29        ]);
30
31        let rows = vec![
32            vec![
33                Value::Int64(1),
34                Value::Bool(true),
35                Value::Utf8("a".to_string()),
36            ],
37            vec![
38                Value::Int64(2),
39                Value::Bool(false),
40                Value::Utf8("b".to_string()),
41            ],
42            vec![
43                Value::Int64(3),
44                Value::Bool(true),
45                Value::Utf8("c".to_string()),
46            ],
47        ];
48
49        DataSet::new(schema, rows)
50    }
51
52    #[test]
53    fn map_rows_transforms_values_and_preserves_schema() {
54        let ds = sample_dataset();
55        let out = map(&ds, |row| {
56            let id = match &row[0] {
57                Value::Int64(v) => Value::Int64(v + 10),
58                other => other.clone(),
59            };
60            let active = match &row[1] {
61                Value::Bool(v) => Value::Bool(!v),
62                other => other.clone(),
63            };
64            let name = match &row[2] {
65                Value::Utf8(s) => Value::Utf8(s.to_uppercase()),
66                other => other.clone(),
67            };
68            vec![id, active, name]
69        });
70
71        assert_eq!(out.schema, ds.schema);
72        assert_eq!(out.row_count(), 3);
73        assert_eq!(
74            out.rows,
75            vec![
76                vec![
77                    Value::Int64(11),
78                    Value::Bool(false),
79                    Value::Utf8("A".to_string())
80                ],
81                vec![
82                    Value::Int64(12),
83                    Value::Bool(true),
84                    Value::Utf8("B".to_string())
85                ],
86                vec![
87                    Value::Int64(13),
88                    Value::Bool(false),
89                    Value::Utf8("C".to_string())
90                ],
91            ]
92        );
93
94        // Original unchanged
95        assert_eq!(ds.rows[0][0], Value::Int64(1));
96        assert_eq!(ds.rows[0][1], Value::Bool(true));
97        assert_eq!(ds.rows[0][2], Value::Utf8("a".to_string()));
98    }
99
100    #[test]
101    #[should_panic(expected = "mapped row length")]
102    fn map_rows_panics_if_mapper_returns_wrong_arity() {
103        let ds = sample_dataset();
104        let _ = ds.map_rows(|_row| vec![Value::Int64(1)]);
105    }
106}