CSV
Parse in-memory CSV string into StringRecords and structs
- csv
- Reader
- from_reader
- flatten
- serde
#[derive(Debug, serde::Deserialize, serde::Serialize)] struct Fruit { fruit: String, color: String, peal: String, } fn main() { let csv_text = " fruit,color,peal apple,red,no banana,yellow,yes mango,green-yellow-red,yes bad,format "; // Read CSV data as StringRecord - disregard errors let mut rdr = csv::Reader::from_reader(csv_text.as_bytes()); for record in rdr.records().flatten() { println!("{:?}", record); } println!(); // Read CSV data as StringRecord - report errors let mut rdr = csv::Reader::from_reader(csv_text.as_bytes()); for result in rdr.records() { if let Ok(record) = result { println!("{:?}", record); } else { println!("Error parsing csv"); } } println!(); // Read CSV data as StringRecord - report errors with details let mut rdr = csv::Reader::from_reader(csv_text.as_bytes()); for result in rdr.records() { match result { Ok(record) => println!("{:?}", record), Err(err) => println!("Error parsing csv {err}"), } } println!(); // Read CSV data as Struct - disregard errors let mut rdr = csv::Reader::from_reader(csv_text.as_bytes()); for fruit in rdr.deserialize::<Fruit>().flatten() { println!("{fruit:?}"); } }
[package]
name = "parse-csv-string"
version = "0.1.0"
edition = "2021"
[dependencies]
csv = "1.3.0"
serde = { version = "1.0.213", features = ["derive"] }
StringRecord(["apple", "red", "no"])
StringRecord(["banana", "yellow", "yes"])
StringRecord(["mango", "green-yellow-red", "yes"])
StringRecord(["apple", "red", "no"])
StringRecord(["banana", "yellow", "yes"])
StringRecord(["mango", "green-yellow-red", "yes"])
Error parsing csv
StringRecord(["apple", "red", "no"])
StringRecord(["banana", "yellow", "yes"])
StringRecord(["mango", "green-yellow-red", "yes"])
Error parsing csv CSV error: record 4 (line: 6, byte: 76): found record with 2 fields, but the previous record has 3 fields
Fruit { fruit: "apple", color: "red", peal: "no" }
Fruit { fruit: "banana", color: "yellow", peal: "yes" }
Fruit { fruit: "mango", color: "green-yellow-red", peal: "yes" }
Read CSV file as a vector of StringRecords
-
StringRecord
-
We read the rows (skipping the first row)
-
We can iterate over the rows or access the individual elements
Planet name,Distance (AU),Mass
Mercury,0.4,0.055
Venus,0.7,0.815
Earth,1,1
Mars,1.5,0.107
Ceres,2.77,0.00015
Jupiter,5.2,318
Saturn,9.5,95
Uranus,19.6,14
Neptune,30,17
Pluto,39,0.00218
Charon,39,0.000254
[package]
name = "handle-csv"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
csv = "1.2.2"
use std::fs::File; fn main() { let filepath = "planets.csv"; let rows = read_file(filepath); for row in &rows { println!("{:?}", row); println!("{}", &row[0]); } println!("------"); for value in &rows[0] { println!("{}", value); } println!("------"); println!("{}", &rows[3][0]); } fn read_file(filepath: &str) -> Vec<csv::StringRecord> { let mut rows: Vec<csv::StringRecord> = vec![]; match File::open(filepath) { Ok(file) => { let mut rdr = csv::Reader::from_reader(file); for result in rdr.records() { match result { Ok(row) => { rows.push(row.clone()); } Err(err) => panic!("Error {}", err), }; } } Err(error) => panic!("Error opening file {}: {}", filepath, error), } rows }
StringRecord(["Mercury", "0.4", "0.055"])
Mercury
StringRecord(["Venus", "0.7", "0.815"])
Venus
StringRecord(["Earth", "1", "1"])
Earth
StringRecord(["Mars", "1.5", "0.107"])
Mars
StringRecord(["Ceres", "2.77", "0.00015"])
Ceres
StringRecord(["Jupiter", "5.2", "318"])
Jupiter
StringRecord(["Saturn", "9.5", "95"])
Saturn
StringRecord(["Uranus", "19.6", "14"])
Uranus
StringRecord(["Neptune", "30", "17"])
Neptune
StringRecord(["Pluto", "39", "0.00218"])
Pluto
StringRecord(["Charon", "39", "0.000254"])
Charon
------
Mercury
0.4
0.055
------
Mars
Read CSV file into hashes
- HashMap
Planet name,Distance (AU),Mass
Mercury,0.4,0.055
Venus,0.7,0.815
Earth,1,1
Mars,1.5,0.107
Ceres,2.77,0.00015
Jupiter,5.2,318
Saturn,9.5,95
Uranus,19.6,14
Neptune,30,17
Pluto,39,0.00218
Charon,39,0.000254
[package]
name = "handle-csv"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
csv = "1.2.2"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.97"
use std::collections::HashMap; use std::error::Error; use std::fs::File; type Record = HashMap<String, String>; fn main() { let filepath = "planets.csv"; let result = read_file(filepath); match result { Ok(rows) => { for row in &rows { println!("{:?}", row); println!("{}", row["Planet name"]); } println!("---"); println!("{}", rows[3]["Planet name"]); } Err(err) => panic!("Error: {}", err), } } fn read_file(filepath: &str) -> Result<Vec<Record>, Box<dyn Error>> { let mut records: Vec<Record> = vec![]; match File::open(filepath) { Ok(file) => { let mut rdr = csv::Reader::from_reader(file); for result in rdr.deserialize() { let record: Record = result?; records.push(record); } } Err(error) => panic!("Error opening file {}: {}", filepath, error), } Ok(records) }
{"Mass": "0.055", "Planet name": "Mercury", "Distance (AU)": "0.4"}
Mercury
{"Distance (AU)": "0.7", "Planet name": "Venus", "Mass": "0.815"}
Venus
{"Planet name": "Earth", "Distance (AU)": "1", "Mass": "1"}
Earth
{"Mass": "0.107", "Distance (AU)": "1.5", "Planet name": "Mars"}
Mars
{"Planet name": "Ceres", "Distance (AU)": "2.77", "Mass": "0.00015"}
Ceres
{"Distance (AU)": "5.2", "Mass": "318", "Planet name": "Jupiter"}
Jupiter
{"Distance (AU)": "9.5", "Planet name": "Saturn", "Mass": "95"}
Saturn
{"Planet name": "Uranus", "Mass": "14", "Distance (AU)": "19.6"}
Uranus
{"Planet name": "Neptune", "Mass": "17", "Distance (AU)": "30"}
Neptune
{"Distance (AU)": "39", "Mass": "0.00218", "Planet name": "Pluto"}
Pluto
{"Planet name": "Charon", "Mass": "0.000254", "Distance (AU)": "39"}
Charon
---
Mars
Read CSV file as structs
- struct
Planet name,Distance (AU),Mass
Mercury,0.4,0.055
Venus,0.7,0.815
Earth,1,1
Mars,1.5,0.107
Ceres,2.77,0.00015
Jupiter,5.2,318
Saturn,9.5,95
Uranus,19.6,14
Neptune,30,17
Pluto,39,0.00218
Charon,39,0.000254
[package]
name = "handle-csv"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
csv = "1.2.2"
serde = { version = "1.0", features = ["derive"] }
use std::error::Error; use std::fs::File; #[derive(Debug, serde::Deserialize)] struct Record { #[serde(rename = "Planet name")] name: String, #[serde(rename = "Distance (AU)")] distance: f32, #[serde(rename = "Mass")] mass: f32, } fn main() { let filepath = "planets.csv"; let result = read_file(filepath); match result { Ok(rows) => { for row in &rows { println!("{:?}", row); } println!("---"); println!("{}", rows[3].name); println!("{}", rows[3].distance); println!("{}", rows[3].mass); } Err(err) => panic!("Error: {}", err), } } fn read_file(filepath: &str) -> Result<Vec<Record>, Box<dyn Error>> { let mut records: Vec<Record> = vec![]; match File::open(filepath) { Ok(file) => { let mut rdr = csv::Reader::from_reader(file); for result in rdr.deserialize() { let record: Record = result?; records.push(record); } } Err(error) => panic!("Error opening file {}: {}", filepath, error), } Ok(records) }
Record { name: "Mercury", distance: 0.4, mass: 0.055 }
Record { name: "Venus", distance: 0.7, mass: 0.815 }
Record { name: "Earth", distance: 1.0, mass: 1.0 }
Record { name: "Mars", distance: 1.5, mass: 0.107 }
Record { name: "Ceres", distance: 2.77, mass: 0.00015 }
Record { name: "Jupiter", distance: 5.2, mass: 318.0 }
Record { name: "Saturn", distance: 9.5, mass: 95.0 }
Record { name: "Uranus", distance: 19.6, mass: 14.0 }
Record { name: "Neptune", distance: 30.0, mass: 17.0 }
Record { name: "Pluto", distance: 39.0, mass: 0.00218 }
Record { name: "Charon", distance: 39.0, mass: 0.000254 }
---
Mars
1.5
0.107
Read CSV to struct, add extra fields
[package]
name = "handle-csv"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
csv = "1.2.2"
serde = { version = "1.0", features = ["derive"] }
Record { name: "Mercury", distance: 0.4, mass: 0.055, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Venus", distance: 0.7, mass: 0.815, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Earth", distance: 1.0, mass: 1.0, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Mars", distance: 1.5, mass: 0.107, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Ceres", distance: 2.77, mass: 0.00015, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Jupiter", distance: 5.2, mass: 318.0, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Saturn", distance: 9.5, mass: 95.0, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Uranus", distance: 19.6, mass: 14.0, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Neptune", distance: 30.0, mass: 17.0, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Pluto", distance: 39.0, mass: 0.00218, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Charon", distance: 39.0, mass: 0.000254, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
---
Mars
1.5
0.107
abc
0
0
Planet name,Distance (AU),Mass
Mercury,0.4,0.055
Venus,0.7,0.815
Earth,1,1
Mars,1.5,0.107
Ceres,2.77,0.00015
Jupiter,5.2,318
Saturn,9.5,95
Uranus,19.6,14
Neptune,30,17
Pluto,39,0.00218
Charon,39,0.000254
use std::error::Error; use std::fs::File; #[derive(Debug, serde::Deserialize)] struct Person { name: String, birth_year: u16, } #[derive(Debug, serde::Deserialize)] struct Record { #[serde(rename = "Planet name")] name: String, #[serde(rename = "Distance (AU)")] distance: f32, #[serde(rename = "Mass")] mass: f32, #[serde(default = "get_text")] text: String, #[serde(default = "get_zero")] float: f32, #[serde(default = "get_person")] person: Person, } fn get_text() -> String { String::from("abc") } fn get_zero() -> f32 { 0.0 } fn get_person() -> Person { Person { name: String::new(), birth_year: 0, } } fn main() { let filepath = "planets.csv"; let result = read_file(filepath); match result { Ok(rows) => { for row in &rows { println!("{:?}", row); } println!("---"); println!("{}", rows[3].name); println!("{}", rows[3].distance); println!("{}", rows[3].mass); println!("{}", rows[3].text); println!("{}", rows[3].float); println!("{}", rows[3].person.name); println!("{}", rows[3].person.birth_year); } Err(err) => panic!("Error: {}", err), } } fn read_file(filepath: &str) -> Result<Vec<Record>, Box<dyn Error>> { let mut records: Vec<Record> = vec![]; match File::open(filepath) { Ok(file) => { let mut rdr = csv::Reader::from_reader(file); for result in rdr.deserialize() { let record: Record = result?; records.push(record); } } Err(error) => panic!("Error opening file {}: {}", filepath, error), } Ok(records) }
Read CSV remove (trim) whitespaces from around the values
-
serde
-
alias
-
Deserialize
-
ReaderBuilder
-
Sometimes we have a CSV file where the data is aligned nicely using spaces to pad the values. (Most likely a manually maintained CSV file).
-
We can tell the CSV reader to trim down does whitespaces.
-
In this example we also used the
alias
attribute ofserde
to map the real titles in the CSV file to the fieldnames of the struct that can use a much more limited set of characters.
Planet name, Distance (AU), Mass
Mercury, 0.4, 0.055
Venus, 0.7, 0.815
Earth, 1, 1
Mars, 1.5, 0.107
Tesla
Ceres, 2.77, 0.00015
Jupiter, 5.2, 318
Saturn, 9.5, 95
Uranus, 19.6, 14
Neptune, 30, 17
Pluto, 39, 0.00218
Charon, 39, 0.000254
#[derive(Debug, serde::Deserialize, serde::Serialize)] struct Planet { #[serde(alias = "Planet name")] name: String, #[serde(alias = "Distance (AU)")] distance: String, #[serde(alias = "Mass")] mass: String, } fn main() { let filepath = "planets.csv"; read_and_print_file(filepath); println!(); for result in read_file_return_results(filepath) { match result { Ok(planet) => println!("{planet:?}"), Err(err) => println!("Error parsing csv {err}"), } } println!(); for planet in read_file_return_planets(filepath) { println!("{planet:?}"); } } fn read_and_print_file(filepath: &str) { let csv_text = std::fs::read_to_string(filepath).expect("Error reading file"); let mut rdr = csv::ReaderBuilder::new() .has_headers(true) .trim(csv::Trim::All) .from_reader(csv_text.as_bytes()); for result in rdr.deserialize::<Planet>() { match result { Ok(record) => println!("{:?}", record), Err(err) => println!("Error parsing csv {err}"), } } } fn read_file_return_results(filepath: &str) -> Vec<Result<Planet, csv::Error>> { let csv_text = std::fs::read_to_string(filepath).expect("Error reading file"); let mut rdr = csv::ReaderBuilder::new() .has_headers(true) .trim(csv::Trim::All) .from_reader(csv_text.as_bytes()); rdr.deserialize::<Planet>().collect::<Vec<_>>() } fn read_file_return_planets(filepath: &str) -> Vec<Planet> { let csv_text = std::fs::read_to_string(filepath).expect("Error reading file"); let mut rdr = csv::ReaderBuilder::new() .has_headers(true) .trim(csv::Trim::All) .from_reader(csv_text.as_bytes()); rdr.deserialize::<Planet>() .filter_map(|entry| entry.ok()) .collect::<Vec<_>>() }
[package]
name = "handle-csv"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
csv = "1.2.2"
serde = { version = "1.0.213", features = ["derive"] }
Planet { name: "Mercury", distance: "0.4", mass: "0.055" }
Planet { name: "Venus", distance: "0.7", mass: "0.815" }
Planet { name: "Earth", distance: "1", mass: "1" }
Planet { name: "Mars", distance: "1.5", mass: "0.107" }
Error parsing csv CSV error: record 5 (line: 6, byte: 175): found record with 1 fields, but the previous record has 3 fields
Planet { name: "Ceres", distance: "2.77", mass: "0.00015" }
Planet { name: "Jupiter", distance: "5.2", mass: "318" }
Planet { name: "Saturn", distance: "9.5", mass: "95" }
Planet { name: "Uranus", distance: "19.6", mass: "14" }
Planet { name: "Neptune", distance: "30", mass: "17" }
Planet { name: "Pluto", distance: "39", mass: "0.00218" }
Planet { name: "Charon", distance: "39", mass: "0.000254" }
Planet { name: "Mercury", distance: "0.4", mass: "0.055" }
Planet { name: "Venus", distance: "0.7", mass: "0.815" }
Planet { name: "Earth", distance: "1", mass: "1" }
Planet { name: "Mars", distance: "1.5", mass: "0.107" }
Error parsing csv CSV error: record 5 (line: 6, byte: 175): found record with 1 fields, but the previous record has 3 fields
Planet { name: "Ceres", distance: "2.77", mass: "0.00015" }
Planet { name: "Jupiter", distance: "5.2", mass: "318" }
Planet { name: "Saturn", distance: "9.5", mass: "95" }
Planet { name: "Uranus", distance: "19.6", mass: "14" }
Planet { name: "Neptune", distance: "30", mass: "17" }
Planet { name: "Pluto", distance: "39", mass: "0.00218" }
Planet { name: "Charon", distance: "39", mass: "0.000254" }
Planet { name: "Mercury", distance: "0.4", mass: "0.055" }
Planet { name: "Venus", distance: "0.7", mass: "0.815" }
Planet { name: "Earth", distance: "1", mass: "1" }
Planet { name: "Mars", distance: "1.5", mass: "0.107" }
Planet { name: "Ceres", distance: "2.77", mass: "0.00015" }
Planet { name: "Jupiter", distance: "5.2", mass: "318" }
Planet { name: "Saturn", distance: "9.5", mass: "95" }
Planet { name: "Uranus", distance: "19.6", mass: "14" }
Planet { name: "Neptune", distance: "30", mass: "17" }
Planet { name: "Pluto", distance: "39", mass: "0.00218" }
Planet { name: "Charon", distance: "39", mass: "0.000254" }