rust_data_processing/
types.rs1use serde::{Deserialize, Serialize};
7
8#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
10pub enum DataType {
11 Int64,
13 Float64,
15 Bool,
17 Utf8,
19}
20
21#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
23pub struct Field {
24 pub name: String,
26 pub data_type: DataType,
28}
29
30impl Field {
31 pub fn new(name: impl Into<String>, data_type: DataType) -> Self {
33 Self {
34 name: name.into(),
35 data_type,
36 }
37 }
38}
39
40#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
42pub struct Schema {
43 pub fields: Vec<Field>,
45}
46
47impl Schema {
48 pub fn new(fields: Vec<Field>) -> Self {
50 Self { fields }
51 }
52
53 pub fn field_names(&self) -> impl Iterator<Item = &str> {
55 self.fields.iter().map(|f| f.name.as_str())
56 }
57
58 pub fn index_of(&self, name: &str) -> Option<usize> {
60 self.fields.iter().position(|f| f.name == name)
61 }
62}
63
64#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
66pub enum Value {
67 Null,
69 Int64(i64),
71 Float64(f64),
73 Bool(bool),
75 Utf8(String),
77}
78
79#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
83pub struct DataSet {
84 pub schema: Schema,
86 pub rows: Vec<Vec<Value>>,
88}
89
90impl DataSet {
91 pub fn new(schema: Schema, rows: Vec<Vec<Value>>) -> Self {
93 Self { schema, rows }
94 }
95
96 pub fn row_count(&self) -> usize {
98 self.rows.len()
99 }
100
101 pub fn filter_rows<F>(&self, mut predicate: F) -> Self
105 where
106 F: FnMut(&[Value]) -> bool,
107 {
108 let rows = self
109 .rows
110 .iter()
111 .filter(|row| predicate(row.as_slice()))
112 .cloned()
113 .collect();
114 Self {
115 schema: self.schema.clone(),
116 rows,
117 }
118 }
119
120 pub fn map_rows<F>(&self, mut mapper: F) -> Self
128 where
129 F: FnMut(&[Value]) -> Vec<Value>,
130 {
131 let expected_len = self.schema.fields.len();
132 let rows = self
133 .rows
134 .iter()
135 .map(|row| {
136 let out = mapper(row.as_slice());
137 assert!(
138 out.len() == expected_len,
139 "mapped row length {} does not match schema length {}",
140 out.len(),
141 expected_len
142 );
143 out
144 })
145 .collect();
146
147 Self {
148 schema: self.schema.clone(),
149 rows,
150 }
151 }
152
153 pub fn reduce_rows<A, F>(&self, init: A, mut reducer: F) -> A
157 where
158 F: FnMut(A, &[Value]) -> A,
159 {
160 self.rows
161 .iter()
162 .fold(init, |acc, row| reducer(acc, row.as_slice()))
163 }
164}