CSV
Parse in-memory CSV string into StringRecords and structs
- csv
- Reader
- from_reader
- flatten
- serde
#[derive(Debug, serde::Deserialize, serde::Serialize)]
struct Fruit {
fruit: String,
color: String,
peal: String,
}
fn main() {
let csv_text = "
fruit,color,peal
apple,red,no
banana,yellow,yes
mango,green-yellow-red,yes
bad,format
";
// Read CSV data as StringRecord - disregard errors
let mut rdr = csv::Reader::from_reader(csv_text.as_bytes());
for record in rdr.records().flatten() {
println!("{:?}", record);
}
println!();
// Read CSV data as StringRecord - report errors
let mut rdr = csv::Reader::from_reader(csv_text.as_bytes());
for result in rdr.records() {
if let Ok(record) = result {
println!("{:?}", record);
} else {
println!("Error parsing csv");
}
}
println!();
// Read CSV data as StringRecord - report errors with details
let mut rdr = csv::Reader::from_reader(csv_text.as_bytes());
for result in rdr.records() {
match result {
Ok(record) => println!("{:?}", record),
Err(err) => println!("Error parsing csv {err}"),
}
}
println!();
// Read CSV data as Struct - disregard errors
let mut rdr = csv::Reader::from_reader(csv_text.as_bytes());
for fruit in rdr.deserialize::<Fruit>().flatten() {
println!("{fruit:?}");
}
}
[package]
name = "parse-csv-string"
version = "0.1.0"
edition = "2024"
[dependencies]
csv = "1.3.0"
serde = { version = "1.0.213", features = ["derive"] }
StringRecord(["apple", "red", "no"])
StringRecord(["banana", "yellow", "yes"])
StringRecord(["mango", "green-yellow-red", "yes"])
StringRecord(["apple", "red", "no"])
StringRecord(["banana", "yellow", "yes"])
StringRecord(["mango", "green-yellow-red", "yes"])
Error parsing csv
StringRecord(["apple", "red", "no"])
StringRecord(["banana", "yellow", "yes"])
StringRecord(["mango", "green-yellow-red", "yes"])
Error parsing csv CSV error: record 4 (line: 6, byte: 76): found record with 2 fields, but the previous record has 3 fields
Fruit { fruit: "apple", color: "red", peal: "no" }
Fruit { fruit: "banana", color: "yellow", peal: "yes" }
Fruit { fruit: "mango", color: "green-yellow-red", peal: "yes" }
Read CSV file as a vector of StringRecords
-
StringRecord
-
We read the rows (skipping the first row)
-
We can iterate over the rows or access the individual elements
Planet name,Distance (AU),Mass
Mercury,0.4,0.055
Venus,0.7,0.815
Earth,1,1
Mars,1.5,0.107
Ceres,2.77,0.00015
Jupiter,5.2,318
Saturn,9.5,95
Uranus,19.6,14
Neptune,30,17
Pluto,39,0.00218
Charon,39,0.000254
[package]
name = "handle-csv"
version = "0.1.0"
edition = "2024"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
csv = "1.2.2"
use std::fs::File;
fn main() {
let filepath = "planets.csv";
let rows = read_file(filepath);
for row in &rows {
println!("{:?}", row);
println!("{}", &row[0]);
}
println!("------");
for value in &rows[0] {
println!("{}", value);
}
println!("------");
println!("{}", &rows[3][0]);
}
fn read_file(filepath: &str) -> Vec<csv::StringRecord> {
let mut rows: Vec<csv::StringRecord> = vec![];
match File::open(filepath) {
Ok(file) => {
let mut rdr = csv::Reader::from_reader(file);
for result in rdr.records() {
match result {
Ok(row) => {
rows.push(row.clone());
}
Err(err) => panic!("Error {}", err),
};
}
}
Err(error) => panic!("Error opening file {}: {}", filepath, error),
}
rows
}
StringRecord(["Mercury", "0.4", "0.055"])
Mercury
StringRecord(["Venus", "0.7", "0.815"])
Venus
StringRecord(["Earth", "1", "1"])
Earth
StringRecord(["Mars", "1.5", "0.107"])
Mars
StringRecord(["Ceres", "2.77", "0.00015"])
Ceres
StringRecord(["Jupiter", "5.2", "318"])
Jupiter
StringRecord(["Saturn", "9.5", "95"])
Saturn
StringRecord(["Uranus", "19.6", "14"])
Uranus
StringRecord(["Neptune", "30", "17"])
Neptune
StringRecord(["Pluto", "39", "0.00218"])
Pluto
StringRecord(["Charon", "39", "0.000254"])
Charon
------
Mercury
0.4
0.055
------
Mars
Read CSV file into hashes
- HashMap
Planet name,Distance (AU),Mass
Mercury,0.4,0.055
Venus,0.7,0.815
Earth,1,1
Mars,1.5,0.107
Ceres,2.77,0.00015
Jupiter,5.2,318
Saturn,9.5,95
Uranus,19.6,14
Neptune,30,17
Pluto,39,0.00218
Charon,39,0.000254
[package]
name = "handle-csv"
version = "0.1.0"
edition = "2024"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
csv = "1.2.2"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.97"
use std::collections::HashMap;
use std::error::Error;
use std::fs::File;
type Record = HashMap<String, String>;
fn main() {
let filepath = "planets.csv";
let result = read_file(filepath);
match result {
Ok(rows) => {
for row in &rows {
println!("{:?}", row);
println!("{}", row["Planet name"]);
}
println!("---");
println!("{}", rows[3]["Planet name"]);
}
Err(err) => panic!("Error: {}", err),
}
}
fn read_file(filepath: &str) -> Result<Vec<Record>, Box<dyn Error>> {
let mut records: Vec<Record> = vec![];
match File::open(filepath) {
Ok(file) => {
let mut rdr = csv::Reader::from_reader(file);
for result in rdr.deserialize() {
let record: Record = result?;
records.push(record);
}
}
Err(error) => panic!("Error opening file {}: {}", filepath, error),
}
Ok(records)
}
{"Mass": "0.055", "Planet name": "Mercury", "Distance (AU)": "0.4"}
Mercury
{"Distance (AU)": "0.7", "Planet name": "Venus", "Mass": "0.815"}
Venus
{"Planet name": "Earth", "Distance (AU)": "1", "Mass": "1"}
Earth
{"Mass": "0.107", "Distance (AU)": "1.5", "Planet name": "Mars"}
Mars
{"Planet name": "Ceres", "Distance (AU)": "2.77", "Mass": "0.00015"}
Ceres
{"Distance (AU)": "5.2", "Mass": "318", "Planet name": "Jupiter"}
Jupiter
{"Distance (AU)": "9.5", "Planet name": "Saturn", "Mass": "95"}
Saturn
{"Planet name": "Uranus", "Mass": "14", "Distance (AU)": "19.6"}
Uranus
{"Planet name": "Neptune", "Mass": "17", "Distance (AU)": "30"}
Neptune
{"Distance (AU)": "39", "Mass": "0.00218", "Planet name": "Pluto"}
Pluto
{"Planet name": "Charon", "Mass": "0.000254", "Distance (AU)": "39"}
Charon
---
Mars
Read CSV file as structs
- struct
Planet name,Distance (AU),Mass
Mercury,0.4,0.055
Venus,0.7,0.815
Earth,1,1
Mars,1.5,0.107
Ceres,2.77,0.00015
Jupiter,5.2,318
Saturn,9.5,95
Uranus,19.6,14
Neptune,30,17
Pluto,39,0.00218
Charon,39,0.000254
[package]
name = "handle-csv"
version = "0.1.0"
edition = "2024"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
csv = "1.2.2"
serde = { version = "1.0", features = ["derive"] }
use std::error::Error;
use std::fs::File;
#[derive(Debug, serde::Deserialize)]
struct Record {
#[serde(rename = "Planet name")]
name: String,
#[serde(rename = "Distance (AU)")]
distance: f32,
#[serde(rename = "Mass")]
mass: f32,
}
fn main() {
let filepath = "planets.csv";
let result = read_file(filepath);
match result {
Ok(rows) => {
for row in &rows {
println!("{:?}", row);
}
println!("---");
println!("{}", rows[3].name);
println!("{}", rows[3].distance);
println!("{}", rows[3].mass);
}
Err(err) => panic!("Error: {}", err),
}
}
fn read_file(filepath: &str) -> Result<Vec<Record>, Box<dyn Error>> {
let mut records: Vec<Record> = vec![];
match File::open(filepath) {
Ok(file) => {
let mut rdr = csv::Reader::from_reader(file);
for result in rdr.deserialize() {
let record: Record = result?;
records.push(record);
}
}
Err(error) => panic!("Error opening file {}: {}", filepath, error),
}
Ok(records)
}
Record { name: "Mercury", distance: 0.4, mass: 0.055 }
Record { name: "Venus", distance: 0.7, mass: 0.815 }
Record { name: "Earth", distance: 1.0, mass: 1.0 }
Record { name: "Mars", distance: 1.5, mass: 0.107 }
Record { name: "Ceres", distance: 2.77, mass: 0.00015 }
Record { name: "Jupiter", distance: 5.2, mass: 318.0 }
Record { name: "Saturn", distance: 9.5, mass: 95.0 }
Record { name: "Uranus", distance: 19.6, mass: 14.0 }
Record { name: "Neptune", distance: 30.0, mass: 17.0 }
Record { name: "Pluto", distance: 39.0, mass: 0.00218 }
Record { name: "Charon", distance: 39.0, mass: 0.000254 }
---
Mars
1.5
0.107
Read CSV to struct, add extra fields
[package]
name = "handle-csv"
version = "0.1.0"
edition = "2024"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
csv = "1.2.2"
serde = { version = "1.0", features = ["derive"] }
Record { name: "Mercury", distance: 0.4, mass: 0.055, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Venus", distance: 0.7, mass: 0.815, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Earth", distance: 1.0, mass: 1.0, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Mars", distance: 1.5, mass: 0.107, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Ceres", distance: 2.77, mass: 0.00015, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Jupiter", distance: 5.2, mass: 318.0, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Saturn", distance: 9.5, mass: 95.0, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Uranus", distance: 19.6, mass: 14.0, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Neptune", distance: 30.0, mass: 17.0, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Pluto", distance: 39.0, mass: 0.00218, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Charon", distance: 39.0, mass: 0.000254, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
---
Mars
1.5
0.107
abc
0
0
Planet name,Distance (AU),Mass
Mercury,0.4,0.055
Venus,0.7,0.815
Earth,1,1
Mars,1.5,0.107
Ceres,2.77,0.00015
Jupiter,5.2,318
Saturn,9.5,95
Uranus,19.6,14
Neptune,30,17
Pluto,39,0.00218
Charon,39,0.000254
use std::error::Error;
use std::fs::File;
#[derive(Debug, serde::Deserialize)]
struct Person {
name: String,
birth_year: u16,
}
#[derive(Debug, serde::Deserialize)]
struct Record {
#[serde(rename = "Planet name")]
name: String,
#[serde(rename = "Distance (AU)")]
distance: f32,
#[serde(rename = "Mass")]
mass: f32,
#[serde(default = "get_text")]
text: String,
#[serde(default = "get_zero")]
float: f32,
#[serde(default = "get_person")]
person: Person,
}
fn get_text() -> String {
String::from("abc")
}
fn get_zero() -> f32 {
0.0
}
fn get_person() -> Person {
Person {
name: String::new(),
birth_year: 0,
}
}
fn main() {
let filepath = "planets.csv";
let result = read_file(filepath);
match result {
Ok(rows) => {
for row in &rows {
println!("{:?}", row);
}
println!("---");
println!("{}", rows[3].name);
println!("{}", rows[3].distance);
println!("{}", rows[3].mass);
println!("{}", rows[3].text);
println!("{}", rows[3].float);
println!("{}", rows[3].person.name);
println!("{}", rows[3].person.birth_year);
}
Err(err) => panic!("Error: {}", err),
}
}
fn read_file(filepath: &str) -> Result<Vec<Record>, Box<dyn Error>> {
let mut records: Vec<Record> = vec![];
match File::open(filepath) {
Ok(file) => {
let mut rdr = csv::Reader::from_reader(file);
for result in rdr.deserialize() {
let record: Record = result?;
records.push(record);
}
}
Err(error) => panic!("Error opening file {}: {}", filepath, error),
}
Ok(records)
}
Read CSV remove (trim) whitespaces from around the values
-
serde
-
alias
-
Deserialize
-
ReaderBuilder
-
Sometimes we have a CSV file where the data is aligned nicely using spaces to pad the values. (Most likely a manually maintained CSV file).
-
We can tell the CSV reader to trim down does whitespaces.
-
In this example we also used the
aliasattribute ofserdeto map the real titles in the CSV file to the fieldnames of the struct that can use a much more limited set of characters.
Planet name, Distance (AU), Mass
Mercury, 0.4, 0.055
Venus, 0.7, 0.815
Earth, 1, 1
Mars, 1.5, 0.107
Tesla
Ceres, 2.77, 0.00015
Jupiter, 5.2, 318
Saturn, 9.5, 95
Uranus, 19.6, 14
Neptune, 30, 17
Pluto, 39, 0.00218
Charon, 39, 0.000254
#[derive(Debug, serde::Deserialize, serde::Serialize)]
struct Planet {
#[serde(alias = "Planet name")]
name: String,
#[serde(alias = "Distance (AU)")]
distance: String,
#[serde(alias = "Mass")]
mass: String,
}
fn main() {
let filepath = "planets.csv";
read_and_print_file(filepath);
println!();
for result in read_file_return_results(filepath) {
match result {
Ok(planet) => println!("{planet:?}"),
Err(err) => println!("Error parsing csv {err}"),
}
}
println!();
for planet in read_file_return_planets(filepath) {
println!("{planet:?}");
}
}
fn read_and_print_file(filepath: &str) {
let csv_text = std::fs::read_to_string(filepath).expect("Error reading file");
let mut rdr = csv::ReaderBuilder::new()
.has_headers(true)
.trim(csv::Trim::All)
.from_reader(csv_text.as_bytes());
for result in rdr.deserialize::<Planet>() {
match result {
Ok(record) => println!("{:?}", record),
Err(err) => println!("Error parsing csv {err}"),
}
}
}
fn read_file_return_results(filepath: &str) -> Vec<Result<Planet, csv::Error>> {
let csv_text = std::fs::read_to_string(filepath).expect("Error reading file");
let mut rdr = csv::ReaderBuilder::new()
.has_headers(true)
.trim(csv::Trim::All)
.from_reader(csv_text.as_bytes());
rdr.deserialize::<Planet>().collect::<Vec<_>>()
}
fn read_file_return_planets(filepath: &str) -> Vec<Planet> {
let csv_text = std::fs::read_to_string(filepath).expect("Error reading file");
let mut rdr = csv::ReaderBuilder::new()
.has_headers(true)
.trim(csv::Trim::All)
.from_reader(csv_text.as_bytes());
rdr.deserialize::<Planet>()
.filter_map(|entry| entry.ok())
.collect::<Vec<_>>()
}
[package]
name = "handle-csv"
version = "0.1.0"
edition = "2024"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
csv = "1.2.2"
serde = { version = "1.0.213", features = ["derive"] }
Planet { name: "Mercury", distance: "0.4", mass: "0.055" }
Planet { name: "Venus", distance: "0.7", mass: "0.815" }
Planet { name: "Earth", distance: "1", mass: "1" }
Planet { name: "Mars", distance: "1.5", mass: "0.107" }
Error parsing csv CSV error: record 5 (line: 6, byte: 175): found record with 1 fields, but the previous record has 3 fields
Planet { name: "Ceres", distance: "2.77", mass: "0.00015" }
Planet { name: "Jupiter", distance: "5.2", mass: "318" }
Planet { name: "Saturn", distance: "9.5", mass: "95" }
Planet { name: "Uranus", distance: "19.6", mass: "14" }
Planet { name: "Neptune", distance: "30", mass: "17" }
Planet { name: "Pluto", distance: "39", mass: "0.00218" }
Planet { name: "Charon", distance: "39", mass: "0.000254" }
Planet { name: "Mercury", distance: "0.4", mass: "0.055" }
Planet { name: "Venus", distance: "0.7", mass: "0.815" }
Planet { name: "Earth", distance: "1", mass: "1" }
Planet { name: "Mars", distance: "1.5", mass: "0.107" }
Error parsing csv CSV error: record 5 (line: 6, byte: 175): found record with 1 fields, but the previous record has 3 fields
Planet { name: "Ceres", distance: "2.77", mass: "0.00015" }
Planet { name: "Jupiter", distance: "5.2", mass: "318" }
Planet { name: "Saturn", distance: "9.5", mass: "95" }
Planet { name: "Uranus", distance: "19.6", mass: "14" }
Planet { name: "Neptune", distance: "30", mass: "17" }
Planet { name: "Pluto", distance: "39", mass: "0.00218" }
Planet { name: "Charon", distance: "39", mass: "0.000254" }
Planet { name: "Mercury", distance: "0.4", mass: "0.055" }
Planet { name: "Venus", distance: "0.7", mass: "0.815" }
Planet { name: "Earth", distance: "1", mass: "1" }
Planet { name: "Mars", distance: "1.5", mass: "0.107" }
Planet { name: "Ceres", distance: "2.77", mass: "0.00015" }
Planet { name: "Jupiter", distance: "5.2", mass: "318" }
Planet { name: "Saturn", distance: "9.5", mass: "95" }
Planet { name: "Uranus", distance: "19.6", mass: "14" }
Planet { name: "Neptune", distance: "30", mass: "17" }
Planet { name: "Pluto", distance: "39", mass: "0.00218" }
Planet { name: "Charon", distance: "39", mass: "0.000254" }