rust_data_processing/processing/
map.rs1use crate::types::{DataSet, Value};
4
5pub fn map<F>(dataset: &DataSet, mapper: F) -> DataSet
13where
14 F: FnMut(&[Value]) -> Vec<Value>,
15{
16 dataset.map_rows(mapper)
17}
18
19#[cfg(test)]
20mod tests {
21 use super::map;
22 use crate::types::{DataSet, DataType, Field, Schema, Value};
23
24 fn sample_dataset() -> DataSet {
25 let schema = Schema::new(vec![
26 Field::new("id", DataType::Int64),
27 Field::new("active", DataType::Bool),
28 Field::new("name", DataType::Utf8),
29 ]);
30
31 let rows = vec![
32 vec![
33 Value::Int64(1),
34 Value::Bool(true),
35 Value::Utf8("a".to_string()),
36 ],
37 vec![
38 Value::Int64(2),
39 Value::Bool(false),
40 Value::Utf8("b".to_string()),
41 ],
42 vec![
43 Value::Int64(3),
44 Value::Bool(true),
45 Value::Utf8("c".to_string()),
46 ],
47 ];
48
49 DataSet::new(schema, rows)
50 }
51
52 #[test]
53 fn map_rows_transforms_values_and_preserves_schema() {
54 let ds = sample_dataset();
55 let out = map(&ds, |row| {
56 let id = match &row[0] {
57 Value::Int64(v) => Value::Int64(v + 10),
58 other => other.clone(),
59 };
60 let active = match &row[1] {
61 Value::Bool(v) => Value::Bool(!v),
62 other => other.clone(),
63 };
64 let name = match &row[2] {
65 Value::Utf8(s) => Value::Utf8(s.to_uppercase()),
66 other => other.clone(),
67 };
68 vec![id, active, name]
69 });
70
71 assert_eq!(out.schema, ds.schema);
72 assert_eq!(out.row_count(), 3);
73 assert_eq!(
74 out.rows,
75 vec![
76 vec![
77 Value::Int64(11),
78 Value::Bool(false),
79 Value::Utf8("A".to_string())
80 ],
81 vec![
82 Value::Int64(12),
83 Value::Bool(true),
84 Value::Utf8("B".to_string())
85 ],
86 vec![
87 Value::Int64(13),
88 Value::Bool(false),
89 Value::Utf8("C".to_string())
90 ],
91 ]
92 );
93
94 assert_eq!(ds.rows[0][0], Value::Int64(1));
96 assert_eq!(ds.rows[0][1], Value::Bool(true));
97 assert_eq!(ds.rows[0][2], Value::Utf8("a".to_string()));
98 }
99
100 #[test]
101 #[should_panic(expected = "mapped row length")]
102 fn map_rows_panics_if_mapper_returns_wrong_arity() {
103 let ds = sample_dataset();
104 let _ = ds.map_rows(|_row| vec![Value::Int64(1)]);
105 }
106}