Keyboard shortcuts

Press or to navigate between chapters

Press S or / to search in the book

Press ? to show this help

Press Esc to hide this help

CSV

Parse in-memory CSV string into StringRecords and structs

  • csv
  • Reader
  • from_reader
  • flatten
  • serde
#[derive(Debug, serde::Deserialize, serde::Serialize)]
struct Fruit {
    fruit: String,
    color: String,
    peal: String,
}

fn main() {
    let csv_text = "
fruit,color,peal
apple,red,no
banana,yellow,yes
mango,green-yellow-red,yes

bad,format
";
    // Read CSV data as StringRecord - disregard errors
    let mut rdr = csv::Reader::from_reader(csv_text.as_bytes());
    for record in rdr.records().flatten() {
        println!("{:?}", record);
    }
    println!();

    // Read CSV data as StringRecord - report errors
    let mut rdr = csv::Reader::from_reader(csv_text.as_bytes());
    for result in rdr.records() {
        if let Ok(record) = result {
            println!("{:?}", record);
        } else {
            println!("Error parsing csv");
        }
    }
    println!();

    // Read CSV data as StringRecord - report errors with details
    let mut rdr = csv::Reader::from_reader(csv_text.as_bytes());
    for result in rdr.records() {
        match result {
            Ok(record) => println!("{:?}", record),
            Err(err) => println!("Error parsing csv {err}"),
        }
    }
    println!();

    // Read CSV data as Struct - disregard errors
    let mut rdr = csv::Reader::from_reader(csv_text.as_bytes());
    for fruit in rdr.deserialize::<Fruit>().flatten() {
        println!("{fruit:?}");
    }
}
[package]
name = "parse-csv-string"
version = "0.1.0"
edition = "2021"

[dependencies]
csv = "1.3.0"
serde = { version = "1.0.213", features = ["derive"] }
StringRecord(["apple", "red", "no"])
StringRecord(["banana", "yellow", "yes"])
StringRecord(["mango", "green-yellow-red", "yes"])

StringRecord(["apple", "red", "no"])
StringRecord(["banana", "yellow", "yes"])
StringRecord(["mango", "green-yellow-red", "yes"])
Error parsing csv

StringRecord(["apple", "red", "no"])
StringRecord(["banana", "yellow", "yes"])
StringRecord(["mango", "green-yellow-red", "yes"])
Error parsing csv CSV error: record 4 (line: 6, byte: 76): found record with 2 fields, but the previous record has 3 fields

Fruit { fruit: "apple", color: "red", peal: "no" }
Fruit { fruit: "banana", color: "yellow", peal: "yes" }
Fruit { fruit: "mango", color: "green-yellow-red", peal: "yes" }

Read CSV file as a vector of StringRecords

  • StringRecord

  • csv

  • StringRecord

  • We read the rows (skipping the first row)

  • We can iterate over the rows or access the individual elements

Planet name,Distance (AU),Mass
Mercury,0.4,0.055
Venus,0.7,0.815
Earth,1,1
Mars,1.5,0.107
Ceres,2.77,0.00015
Jupiter,5.2,318
Saturn,9.5,95
Uranus,19.6,14
Neptune,30,17
Pluto,39,0.00218
Charon,39,0.000254
[package]
name = "handle-csv"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
csv = "1.2.2"
use std::fs::File;

fn main() {
    let filepath = "planets.csv";
    let rows = read_file(filepath);
    for row in &rows {
        println!("{:?}", row);
        println!("{}", &row[0]);
    }
    println!("------");
    for value in &rows[0] {
        println!("{}", value);
    }
    println!("------");

    println!("{}", &rows[3][0]);
}

fn read_file(filepath: &str) -> Vec<csv::StringRecord> {
    let mut rows: Vec<csv::StringRecord> = vec![];
    match File::open(filepath) {
        Ok(file) => {
            let mut rdr = csv::Reader::from_reader(file);
            for result in rdr.records() {
                match result {
                    Ok(row) => {
                        rows.push(row.clone());
                    }
                    Err(err) => panic!("Error {}", err),
                };
            }
        }
        Err(error) => panic!("Error opening file {}: {}", filepath, error),
    }

    rows
}
StringRecord(["Mercury", "0.4", "0.055"])
Mercury
StringRecord(["Venus", "0.7", "0.815"])
Venus
StringRecord(["Earth", "1", "1"])
Earth
StringRecord(["Mars", "1.5", "0.107"])
Mars
StringRecord(["Ceres", "2.77", "0.00015"])
Ceres
StringRecord(["Jupiter", "5.2", "318"])
Jupiter
StringRecord(["Saturn", "9.5", "95"])
Saturn
StringRecord(["Uranus", "19.6", "14"])
Uranus
StringRecord(["Neptune", "30", "17"])
Neptune
StringRecord(["Pluto", "39", "0.00218"])
Pluto
StringRecord(["Charon", "39", "0.000254"])
Charon
------
Mercury
0.4
0.055
------
Mars

Read CSV file into hashes

  • HashMap
Planet name,Distance (AU),Mass
Mercury,0.4,0.055
Venus,0.7,0.815
Earth,1,1
Mars,1.5,0.107
Ceres,2.77,0.00015
Jupiter,5.2,318
Saturn,9.5,95
Uranus,19.6,14
Neptune,30,17
Pluto,39,0.00218
Charon,39,0.000254
[package]
name = "handle-csv"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
csv = "1.2.2"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.97"
use std::collections::HashMap;
use std::error::Error;
use std::fs::File;

type Record = HashMap<String, String>;

fn main() {
    let filepath = "planets.csv";
    let result = read_file(filepath);
    match result {
        Ok(rows) => {
            for row in &rows {
                println!("{:?}", row);
                println!("{}", row["Planet name"]);
            }
            println!("---");
            println!("{}", rows[3]["Planet name"]);
        }
        Err(err) => panic!("Error: {}", err),
    }
}

fn read_file(filepath: &str) -> Result<Vec<Record>, Box<dyn Error>> {
    let mut records: Vec<Record> = vec![];
    match File::open(filepath) {
        Ok(file) => {
            let mut rdr = csv::Reader::from_reader(file);
            for result in rdr.deserialize() {
                let record: Record = result?;
                records.push(record);
            }
        }
        Err(error) => panic!("Error opening file {}: {}", filepath, error),
    }
    Ok(records)
}
{"Mass": "0.055", "Planet name": "Mercury", "Distance (AU)": "0.4"}
Mercury
{"Distance (AU)": "0.7", "Planet name": "Venus", "Mass": "0.815"}
Venus
{"Planet name": "Earth", "Distance (AU)": "1", "Mass": "1"}
Earth
{"Mass": "0.107", "Distance (AU)": "1.5", "Planet name": "Mars"}
Mars
{"Planet name": "Ceres", "Distance (AU)": "2.77", "Mass": "0.00015"}
Ceres
{"Distance (AU)": "5.2", "Mass": "318", "Planet name": "Jupiter"}
Jupiter
{"Distance (AU)": "9.5", "Planet name": "Saturn", "Mass": "95"}
Saturn
{"Planet name": "Uranus", "Mass": "14", "Distance (AU)": "19.6"}
Uranus
{"Planet name": "Neptune", "Mass": "17", "Distance (AU)": "30"}
Neptune
{"Distance (AU)": "39", "Mass": "0.00218", "Planet name": "Pluto"}
Pluto
{"Planet name": "Charon", "Mass": "0.000254", "Distance (AU)": "39"}
Charon
---
Mars

Read CSV file as structs

  • struct
Planet name,Distance (AU),Mass
Mercury,0.4,0.055
Venus,0.7,0.815
Earth,1,1
Mars,1.5,0.107
Ceres,2.77,0.00015
Jupiter,5.2,318
Saturn,9.5,95
Uranus,19.6,14
Neptune,30,17
Pluto,39,0.00218
Charon,39,0.000254
[package]
name = "handle-csv"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
csv = "1.2.2"
serde = { version = "1.0", features = ["derive"] }
use std::error::Error;
use std::fs::File;

#[derive(Debug, serde::Deserialize)]
struct Record {
    #[serde(rename = "Planet name")]
    name: String,

    #[serde(rename = "Distance (AU)")]
    distance: f32,

    #[serde(rename = "Mass")]
    mass: f32,
}

fn main() {
    let filepath = "planets.csv";
    let result = read_file(filepath);
    match result {
        Ok(rows) => {
            for row in &rows {
                println!("{:?}", row);
            }
            println!("---");
            println!("{}", rows[3].name);
            println!("{}", rows[3].distance);
            println!("{}", rows[3].mass);
        }
        Err(err) => panic!("Error: {}", err),
    }
}

fn read_file(filepath: &str) -> Result<Vec<Record>, Box<dyn Error>> {
    let mut records: Vec<Record> = vec![];
    match File::open(filepath) {
        Ok(file) => {
            let mut rdr = csv::Reader::from_reader(file);
            for result in rdr.deserialize() {
                let record: Record = result?;
                records.push(record);
            }
        }
        Err(error) => panic!("Error opening file {}: {}", filepath, error),
    }
    Ok(records)
}
Record { name: "Mercury", distance: 0.4, mass: 0.055 }
Record { name: "Venus", distance: 0.7, mass: 0.815 }
Record { name: "Earth", distance: 1.0, mass: 1.0 }
Record { name: "Mars", distance: 1.5, mass: 0.107 }
Record { name: "Ceres", distance: 2.77, mass: 0.00015 }
Record { name: "Jupiter", distance: 5.2, mass: 318.0 }
Record { name: "Saturn", distance: 9.5, mass: 95.0 }
Record { name: "Uranus", distance: 19.6, mass: 14.0 }
Record { name: "Neptune", distance: 30.0, mass: 17.0 }
Record { name: "Pluto", distance: 39.0, mass: 0.00218 }
Record { name: "Charon", distance: 39.0, mass: 0.000254 }
---
Mars
1.5
0.107

Read CSV to struct, add extra fields

[package]
name = "handle-csv"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
csv = "1.2.2"
serde = { version = "1.0", features = ["derive"] }
Record { name: "Mercury", distance: 0.4, mass: 0.055, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Venus", distance: 0.7, mass: 0.815, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Earth", distance: 1.0, mass: 1.0, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Mars", distance: 1.5, mass: 0.107, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Ceres", distance: 2.77, mass: 0.00015, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Jupiter", distance: 5.2, mass: 318.0, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Saturn", distance: 9.5, mass: 95.0, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Uranus", distance: 19.6, mass: 14.0, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Neptune", distance: 30.0, mass: 17.0, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Pluto", distance: 39.0, mass: 0.00218, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
Record { name: "Charon", distance: 39.0, mass: 0.000254, text: "abc", float: 0.0, person: Person { name: "", birth_year: 0 } }
---
Mars
1.5
0.107
abc
0

0
Planet name,Distance (AU),Mass
Mercury,0.4,0.055
Venus,0.7,0.815
Earth,1,1
Mars,1.5,0.107
Ceres,2.77,0.00015
Jupiter,5.2,318
Saturn,9.5,95
Uranus,19.6,14
Neptune,30,17
Pluto,39,0.00218
Charon,39,0.000254
use std::error::Error;
use std::fs::File;

#[derive(Debug, serde::Deserialize)]
struct Person {
    name: String,
    birth_year: u16,
}

#[derive(Debug, serde::Deserialize)]
struct Record {
    #[serde(rename = "Planet name")]
    name: String,

    #[serde(rename = "Distance (AU)")]
    distance: f32,

    #[serde(rename = "Mass")]
    mass: f32,

    #[serde(default = "get_text")]
    text: String,

    #[serde(default = "get_zero")]
    float: f32,

    #[serde(default = "get_person")]
    person: Person,
}
fn get_text() -> String {
    String::from("abc")
}
fn get_zero() -> f32 {
    0.0
}
fn get_person() -> Person {
    Person {
        name: String::new(),
        birth_year: 0,
    }
}

fn main() {
    let filepath = "planets.csv";
    let result = read_file(filepath);
    match result {
        Ok(rows) => {
            for row in &rows {
                println!("{:?}", row);
            }
            println!("---");
            println!("{}", rows[3].name);
            println!("{}", rows[3].distance);
            println!("{}", rows[3].mass);
            println!("{}", rows[3].text);
            println!("{}", rows[3].float);
            println!("{}", rows[3].person.name);
            println!("{}", rows[3].person.birth_year);
        }
        Err(err) => panic!("Error: {}", err),
    }
}

fn read_file(filepath: &str) -> Result<Vec<Record>, Box<dyn Error>> {
    let mut records: Vec<Record> = vec![];
    match File::open(filepath) {
        Ok(file) => {
            let mut rdr = csv::Reader::from_reader(file);
            for result in rdr.deserialize() {
                let record: Record = result?;
                records.push(record);
            }
        }
        Err(error) => panic!("Error opening file {}: {}", filepath, error),
    }
    Ok(records)
}

Read CSV remove (trim) whitespaces from around the values

  • serde

  • alias

  • Deserialize

  • ReaderBuilder

  • Sometimes we have a CSV file where the data is aligned nicely using spaces to pad the values. (Most likely a manually maintained CSV file).

  • We can tell the CSV reader to trim down does whitespaces.

  • In this example we also used the alias attribute of serde to map the real titles in the CSV file to the fieldnames of the struct that can use a much more limited set of characters.

Planet name,  Distance (AU),  Mass
Mercury,      0.4,            0.055
Venus,        0.7,            0.815
Earth,        1,              1
Mars,         1.5,            0.107
Tesla
Ceres,        2.77,           0.00015
Jupiter,      5.2,            318
Saturn,       9.5,            95
Uranus,       19.6,           14
Neptune,      30,             17
Pluto,        39,             0.00218
Charon,       39,             0.000254
#[derive(Debug, serde::Deserialize, serde::Serialize)]
struct Planet {
    #[serde(alias = "Planet name")]
    name: String,

    #[serde(alias = "Distance (AU)")]
    distance: String,

    #[serde(alias = "Mass")]
    mass: String,
}

fn main() {
    let filepath = "planets.csv";

    read_and_print_file(filepath);
    println!();

    for result in read_file_return_results(filepath) {
        match result {
            Ok(planet) => println!("{planet:?}"),
            Err(err) => println!("Error parsing csv {err}"),
        }
    }
    println!();

    for planet in read_file_return_planets(filepath) {
        println!("{planet:?}");
    }
}

fn read_and_print_file(filepath: &str) {
    let csv_text = std::fs::read_to_string(filepath).expect("Error reading file");
    let mut rdr = csv::ReaderBuilder::new()
        .has_headers(true)
        .trim(csv::Trim::All)
        .from_reader(csv_text.as_bytes());

    for result in rdr.deserialize::<Planet>() {
        match result {
            Ok(record) => println!("{:?}", record),
            Err(err) => println!("Error parsing csv {err}"),
        }
    }
}

fn read_file_return_results(filepath: &str) -> Vec<Result<Planet, csv::Error>> {
    let csv_text = std::fs::read_to_string(filepath).expect("Error reading file");
    let mut rdr = csv::ReaderBuilder::new()
        .has_headers(true)
        .trim(csv::Trim::All)
        .from_reader(csv_text.as_bytes());

    rdr.deserialize::<Planet>().collect::<Vec<_>>()
}

fn read_file_return_planets(filepath: &str) -> Vec<Planet> {
    let csv_text = std::fs::read_to_string(filepath).expect("Error reading file");
    let mut rdr = csv::ReaderBuilder::new()
        .has_headers(true)
        .trim(csv::Trim::All)
        .from_reader(csv_text.as_bytes());

    rdr.deserialize::<Planet>()
        .filter_map(|entry| entry.ok())
        .collect::<Vec<_>>()
}
[package]
name = "handle-csv"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
csv = "1.2.2"
serde = { version = "1.0.213", features = ["derive"] }
Planet { name: "Mercury", distance: "0.4", mass: "0.055" }
Planet { name: "Venus", distance: "0.7", mass: "0.815" }
Planet { name: "Earth", distance: "1", mass: "1" }
Planet { name: "Mars", distance: "1.5", mass: "0.107" }
Error parsing csv CSV error: record 5 (line: 6, byte: 175): found record with 1 fields, but the previous record has 3 fields
Planet { name: "Ceres", distance: "2.77", mass: "0.00015" }
Planet { name: "Jupiter", distance: "5.2", mass: "318" }
Planet { name: "Saturn", distance: "9.5", mass: "95" }
Planet { name: "Uranus", distance: "19.6", mass: "14" }
Planet { name: "Neptune", distance: "30", mass: "17" }
Planet { name: "Pluto", distance: "39", mass: "0.00218" }
Planet { name: "Charon", distance: "39", mass: "0.000254" }

Planet { name: "Mercury", distance: "0.4", mass: "0.055" }
Planet { name: "Venus", distance: "0.7", mass: "0.815" }
Planet { name: "Earth", distance: "1", mass: "1" }
Planet { name: "Mars", distance: "1.5", mass: "0.107" }
Error parsing csv CSV error: record 5 (line: 6, byte: 175): found record with 1 fields, but the previous record has 3 fields
Planet { name: "Ceres", distance: "2.77", mass: "0.00015" }
Planet { name: "Jupiter", distance: "5.2", mass: "318" }
Planet { name: "Saturn", distance: "9.5", mass: "95" }
Planet { name: "Uranus", distance: "19.6", mass: "14" }
Planet { name: "Neptune", distance: "30", mass: "17" }
Planet { name: "Pluto", distance: "39", mass: "0.00218" }
Planet { name: "Charon", distance: "39", mass: "0.000254" }

Planet { name: "Mercury", distance: "0.4", mass: "0.055" }
Planet { name: "Venus", distance: "0.7", mass: "0.815" }
Planet { name: "Earth", distance: "1", mass: "1" }
Planet { name: "Mars", distance: "1.5", mass: "0.107" }
Planet { name: "Ceres", distance: "2.77", mass: "0.00015" }
Planet { name: "Jupiter", distance: "5.2", mass: "318" }
Planet { name: "Saturn", distance: "9.5", mass: "95" }
Planet { name: "Uranus", distance: "19.6", mass: "14" }
Planet { name: "Neptune", distance: "30", mass: "17" }
Planet { name: "Pluto", distance: "39", mass: "0.00218" }
Planet { name: "Charon", distance: "39", mass: "0.000254" }