- serde
- alias
- Deserialize
- ReaderBuilder
Read CSV remove (trim) whitespaces from around the values
- Sometimes we have a CSV file where the data is aligned nicely using spaces to pad the values. (Most likely a manually maintained CSV file).
- We can tell the CSV reader to trim down does whitespaces.
- In this example we also used the alias attribute of serde to map the real titles in the CSV file to the fieldnames of the struct that can use a much more limited set of characters.
examples/csv/csv-trim-all-the-whitespaces/planets.csv
Planet name, Distance (AU), Mass Mercury, 0.4, 0.055 Venus, 0.7, 0.815 Earth, 1, 1 Mars, 1.5, 0.107 Tesla Ceres, 2.77, 0.00015 Jupiter, 5.2, 318 Saturn, 9.5, 95 Uranus, 19.6, 14 Neptune, 30, 17 Pluto, 39, 0.00218 Charon, 39, 0.000254
examples/csv/csv-trim-all-the-whitespaces/src/main.rs
#[derive(Debug, serde::Deserialize, serde::Serialize)] struct Planet { #[serde(alias = "Planet name")] name: String, #[serde(alias = "Distance (AU)")] distance: String, #[serde(alias = "Mass")] mass: String, } fn main() { let filepath = "planets.csv"; read_and_print_file(filepath); println!(); for result in read_file_return_results(filepath) { match result { Ok(planet) => println!("{planet:?}"), Err(err) => println!("Error parsing csv {err}"), } } println!(); for planet in read_file_return_planets(filepath) { println!("{planet:?}"); } } fn read_and_print_file(filepath: &str) { let csv_text = std::fs::read_to_string(filepath).expect("Error reading file"); let mut rdr = csv::ReaderBuilder::new() .has_headers(true) .trim(csv::Trim::All) .from_reader(csv_text.as_bytes()); for result in rdr.deserialize::<Planet>() { match result { Ok(record) => println!("{:?}", record), Err(err) => println!("Error parsing csv {err}"), } } } fn read_file_return_results(filepath: &str) -> Vec<Result<Planet, csv::Error>> { let csv_text = std::fs::read_to_string(filepath).expect("Error reading file"); let mut rdr = csv::ReaderBuilder::new() .has_headers(true) .trim(csv::Trim::All) .from_reader(csv_text.as_bytes()); rdr.deserialize::<Planet>().collect::<Vec<_>>() } fn read_file_return_planets(filepath: &str) -> Vec<Planet> { let csv_text = std::fs::read_to_string(filepath).expect("Error reading file"); let mut rdr = csv::ReaderBuilder::new() .has_headers(true) .trim(csv::Trim::All) .from_reader(csv_text.as_bytes()); rdr.deserialize::<Planet>() .filter_map(|entry| entry.ok()) .collect::<Vec<_>>() }
examples/csv/csv-trim-all-the-whitespaces/Cargo.toml
[package] name = "handle-csv" version = "0.1.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] csv = "1.2.2" serde = { version = "1.0.213", features = ["derive"] }
Planet { name: "Mercury", distance: "0.4", mass: "0.055" } Planet { name: "Venus", distance: "0.7", mass: "0.815" } Planet { name: "Earth", distance: "1", mass: "1" } Planet { name: "Mars", distance: "1.5", mass: "0.107" } Error parsing csv CSV error: record 5 (line: 6, byte: 175): found record with 1 fields, but the previous record has 3 fields Planet { name: "Ceres", distance: "2.77", mass: "0.00015" } Planet { name: "Jupiter", distance: "5.2", mass: "318" } Planet { name: "Saturn", distance: "9.5", mass: "95" } Planet { name: "Uranus", distance: "19.6", mass: "14" } Planet { name: "Neptune", distance: "30", mass: "17" } Planet { name: "Pluto", distance: "39", mass: "0.00218" } Planet { name: "Charon", distance: "39", mass: "0.000254" } Planet { name: "Mercury", distance: "0.4", mass: "0.055" } Planet { name: "Venus", distance: "0.7", mass: "0.815" } Planet { name: "Earth", distance: "1", mass: "1" } Planet { name: "Mars", distance: "1.5", mass: "0.107" } Error parsing csv CSV error: record 5 (line: 6, byte: 175): found record with 1 fields, but the previous record has 3 fields Planet { name: "Ceres", distance: "2.77", mass: "0.00015" } Planet { name: "Jupiter", distance: "5.2", mass: "318" } Planet { name: "Saturn", distance: "9.5", mass: "95" } Planet { name: "Uranus", distance: "19.6", mass: "14" } Planet { name: "Neptune", distance: "30", mass: "17" } Planet { name: "Pluto", distance: "39", mass: "0.00218" } Planet { name: "Charon", distance: "39", mass: "0.000254" } Planet { name: "Mercury", distance: "0.4", mass: "0.055" } Planet { name: "Venus", distance: "0.7", mass: "0.815" } Planet { name: "Earth", distance: "1", mass: "1" } Planet { name: "Mars", distance: "1.5", mass: "0.107" } Planet { name: "Ceres", distance: "2.77", mass: "0.00015" } Planet { name: "Jupiter", distance: "5.2", mass: "318" } Planet { name: "Saturn", distance: "9.5", mass: "95" } Planet { name: "Uranus", distance: "19.6", mass: "14" } Planet { name: "Neptune", distance: "30", mass: "17" } Planet { name: "Pluto", distance: "39", mass: "0.00218" } Planet { name: "Charon", distance: "39", mass: "0.000254" }