From 197e4592b94d56d92d3280cf04555ea9d1eb301f Mon Sep 17 00:00:00 2001 From: Johann150 Date: Fri, 12 Feb 2021 16:51:42 +0100 Subject: [PATCH] use configparser crate This parser can correctly read globs from configuration keys which allows their use not just in theory in the server logic but in the config file too. --- CHANGELOG.md | 2 +- Cargo.lock | 49 +++++------- Cargo.toml | 4 +- README.md | 14 ++-- src/metadata.rs | 204 ++++++++++++++++++++++-------------------------- 5 files changed, 126 insertions(+), 147 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9644bc9..76a006d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,7 @@ Thank you to @gegeweb for contributing to this release. * The `.meta` configuration file now allows for globs to be used. ### Changed -* The configuration files are now parsed as YAML. The syntax only changes in that a space is now required behind the colon. +* The `.meta` file parser now uses the `configparser` crate. The syntax does not change. * The changelog is now also kept in this file in addition to the GitHub releases. * Certificate chain and key file are now only loaded once at startup, certificate changes need a restart to take effect. * Hidden files are now served if there is an explicit setting in a `.meta` file for them, regardless of the `--serve-secret` flag. diff --git a/Cargo.lock b/Cargo.lock index e7e5578..7336c08 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,6 +4,7 @@ name = "agate" version = "2.4.1" dependencies = [ + "configparser", "env_logger", "getopts", "glob", @@ -15,7 +16,6 @@ dependencies = [ "tokio", "tokio-rustls", "url", - "yaml-rust", ] [[package]] @@ -66,10 +66,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] -name = "env_logger" -version = "0.8.2" +name = "configparser" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26ecb66b4bdca6c1409b40fb255eefc2bd4f6d135dab3c3124f80ffa2a9661e" +checksum = "e2616d8c1fbf887d76dd8e067ec1bc3be7669994378428b4415a8e4ad57baae1" + +[[package]] +name = "env_logger" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17392a012ea30ef05a610aa97dfb49496e71c9f676b27879922ea5bdf60d9d3f" dependencies = [ "atty", "humantime", @@ -119,9 +125,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "idna" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02e2673c30ee86b5b96a9cb52ad15718aa1f966f5ab9ad54a8b95d5ca33120a9" +checksum = "de910d521f7cc3135c4de8db1cb910e0b5ed1dc6f57c381cd07e8e661ce10094" dependencies = [ "matches", "unicode-bidi", @@ -145,15 +151,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.85" +version = "0.2.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ccac4b00700875e6a07c6cde370d44d32fa01c5a65cdd2fca6858c479d28bb3" - -[[package]] -name = "linked-hash-map" -version = "0.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fb9b38af92608140b86b693604b9ffcc5824240a484d1ecd4795bacb2fe88f3" +checksum = "b7282d924be3275cec7f6756ff4121987bc6481325397dde6ba3e7802b1a8b1c" [[package]] name = "log" @@ -263,9 +263,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.8" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "991431c3519a3f36861882da93630ce66b52918dcf1b8e2fd66b397fc96f28df" +checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" dependencies = [ "proc-macro2", ] @@ -362,9 +362,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" [[package]] name = "tokio" -version = "1.1.1" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6714d663090b6b0acb0fa85841c6d66233d150cdb2602c8f9b8abb03370beb3f" +checksum = "e8190d04c665ea9e6b6a0dc45523ade572c088d2e6566244c1122671dbf4ae3a" dependencies = [ "autocfg", "bytes", @@ -406,9 +406,9 @@ dependencies = [ [[package]] name = "unicode-normalization" -version = "0.1.16" +version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a13e63ab62dbe32aeee58d1c5408d35c36c392bba5d9d3142287219721afe606" +checksum = "07fbfce1c8a97d547e8b5334978438d9d6ec8c20e38f56d4a4374d181493eaef" dependencies = [ "tinyvec", ] @@ -553,12 +553,3 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "yaml-rust" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" -dependencies = [ - "linked-hash-map", -] diff --git a/Cargo.toml b/Cargo.toml index ce030ee..d33145f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,7 @@ exclude = ["/tools", ".github/", "release.sh", "/content"] [dependencies] tokio-rustls = "0.22.0" -tokio = { version = "1.1", features = ["fs", "io-util", "net", "rt-multi-thread", "sync"] } +tokio = { version = "1.2", features = ["fs", "io-util", "net", "rt-multi-thread", "sync"] } env_logger = { version = "0.8", default-features = false, features = ["atty", "humantime", "termcolor"] } getopts = "0.2.21" log = "0.4" @@ -22,8 +22,8 @@ once_cell = "1.5" percent-encoding = "2.1" rustls = "0.19.0" url = "2.2" -yaml-rust = "0.4" glob = "0.3" +configparser = "2.0" [profile.release] lto = true diff --git a/README.md b/README.md index dd59da6..a2904c1 100644 --- a/README.md +++ b/README.md @@ -68,19 +68,19 @@ A file called `index.gmi` will always take precedence over a directory listing. You can put a file called `.meta` in any content directory. This file stores some metadata about the adjacent files which Agate will use when serving these files. The `.meta` file must be UTF-8 encoded. You can also enable a central configuration file with the `-C` flag (or the long version `--central-conf`). In this case Agate will always look for the `.meta` configuration file in the content root directory and will ignore `.meta` files in other directories. -The `.meta` file is parsed as a YAML file and should contain a "hash" datatype with file names as the keys. This means: -* Lines starting with a `#` are comments and will be ignored, as will empty lines. -* All other lines must have the form `: :`, i.e. start with a file path, followed by a colon and then the metadata. `` is a case sensitive file path, which may or may not exist on disk. If leads to a directory, it is ignored. -If central configuration file mode is not used, using a path that is not a file in the current directory is undefined behaviour (for example: `../index.gmi` would be undefined behaviour). +If central configuration file mode is not used, using a path that is not a file in the current directory is undefined behaviour (for example `../index.gmi` would be undefined behaviour). You can use Unix style patterns in existing paths. For example `content/*` will match any file within `content`, and `content/**` will additionally match any files in subdirectories of `content`. -However, the `*` and `**` globs on their own will by default not match files or directories that start with a dot because of their special meaning (see Directory listing). +However, the `*` and `**` globs on their own will by default not match files or directories that start with a dot because of their special meaning. This behaviour can be disabled with `--serve-secret` or by explicitly matching files starting with a dot with e.g. `content/.*` or `content/**/.*` respectively. For more information on the patterns you can use, please see the [documentation of `glob::Pattern`](https://https://docs.rs/glob/0.3.0/glob/struct.Pattern.html). Rules can overwrite other rules, so if a file is matched by multiple rules, the last one applies. -The metadata can take one of four possible forms: +`` can take one of four possible forms: 1. empty Agate will not send a default language parameter, even if it was specified on the command line. 2. starting with a semicolon followed by MIME parameters @@ -112,6 +112,8 @@ requested filename|response header any non-hidden file ending in `.de.gmi` (including in non-hidden subdirectories)|`20 text/gemini;lang=de` any non-hidden file in the `nl` directory ending in `.gmi` (including in non-hidden subdirectories)|`20 text/gemini;lang=nl` +(*1) In theory the syntax is that of a typical INI-like file and also allows for sections with `[section]` (the default section is set to `m̀ime` in the parser), since all other sections are disregarded, this does not make a difference. This also means that you can in theory also use `=` instead of `:`. For even more information, you can visit the [documentation of `configparser`](https://docs.rs/configparser/2.0). + ### Logging Verbosity Agate uses the `env_logger` crate and allows you to set the logging verbosity by setting the default `RUST_LOG` environment variable. For more information, please see the [documentation of `env_logger`]. diff --git a/src/metadata.rs b/src/metadata.rs index 4edb850..066e23b 100644 --- a/src/metadata.rs +++ b/src/metadata.rs @@ -1,8 +1,8 @@ +use configparser::ini::Ini; use glob::{glob_with, MatchOptions}; use std::collections::BTreeMap; use std::path::{Path, PathBuf}; use std::time::SystemTime; -use yaml_rust::YamlLoader; static SIDECAR_FILENAME: &str = ".meta"; @@ -78,7 +78,7 @@ impl FileOptions { }; db.push(SIDECAR_FILENAME); - let should_read = if let Ok(metadata) = db.as_path().metadata() { + let should_read = if let Ok(metadata) = db.metadata() { if !metadata.is_file() { // it exists, but it is a directory false @@ -109,122 +109,108 @@ impl FileOptions { fn read_database(&mut self, db: &PathBuf) { log::trace!("reading database {:?}", db); - if let Ok(contents) = std::fs::read_to_string(db) { - self.databases_read - .insert(db.to_path_buf(), SystemTime::now()); + let mut ini = Ini::new_cs(); + ini.set_default_section("mime"); + let map = ini + .load(db.to_str().expect("config path not UTF-8")) + .and_then(|mut sections| { + sections + .remove("mime") + .ok_or_else(|| "no \"mime\" or default section".to_string()) + }); + self.databases_read + .insert(db.to_path_buf(), SystemTime::now()); + let files = match map { + Ok(section) => section, + Err(err) => { + log::error!("invalid config file {:?}: {}", db, err); + return; + } + }; - let docs = match YamlLoader::load_from_str(&contents) { - Ok(docs) => docs, - Err(e) => { - log::error!("Invalid YAML document in {:?}: {}", db, e); + for (rel_path, header) in files { + // treat unassigned keys as if they had an empty value + let header = header.unwrap_or_default(); + + // generate workspace-relative path + let mut path = db.clone(); + path.pop(); + path.push(rel_path); + + // parse the preset + let preset = if header.is_empty() || header.starts_with(';') { + PresetMeta::Parameters(header.to_string()) + } else if matches!(header.chars().next(), Some('1'..='6')) { + if header.len() < 3 + || !header.chars().nth(1).unwrap().is_ascii_digit() + || !header.chars().nth(2).unwrap().is_whitespace() + { + log::error!("Line for {:?} starts like a full header line, but it is incorrect; ignoring it.", path); return; } + let separator = header.chars().nth(2).unwrap(); + if separator != ' ' { + // the Gemini specification says that the third + // character has to be a space, so correct any + // other whitespace to it (e.g. tabs) + log::warn!("Full Header line for {:?} has an invalid character, treating {:?} as a space.", path, separator); + } + let status = header + .chars() + .take(2) + .collect::() + .parse::() + // unwrap since we alread checked it's a number + .unwrap(); + // not taking a slice here because the separator + // might be a whitespace wider than a byte + let meta = header.chars().skip(3).collect::(); + PresetMeta::FullHeader(status, meta) + } else { + // must be a MIME type, but without status code + PresetMeta::FullMime(header.to_string()) }; - if let Some(files) = docs.get(0).and_then(|hash| hash.as_hash()) { - for (rel_path, header) in files { - // from YAML to Rust types - let rel_path = if let Some(rel_path) = rel_path.as_str() { - rel_path - } else { - log::error!( - "Expected string filename, but got {:?} in {:?}", - rel_path, - db - ); + + let glob_options = MatchOptions { + case_sensitive: true, + // so there is a difference between "*" and "**". + require_literal_separator: true, + // security measure because entries for .hidden files + // would result in them being exposed. + require_literal_leading_dot: !crate::ARGS.serve_secret, + }; + + // process filename as glob + let paths = if let Some(path) = path.to_str() { + match glob_with(path, glob_options) { + Ok(paths) => paths.collect::>(), + Err(err) => { + log::error!("incorrect glob pattern in {:?}: {}", path, err); continue; - }; - let header = if let Some(header) = header.as_str() { - header - } else { - log::error!("Expected string contents, but got {:?} in {:?}", header, db); - continue; - }; - - // generate workspace-relative path - let mut path = db.clone(); - path.pop(); - path.push(rel_path); - - // parse the preset - let preset = if header.is_empty() || header.starts_with(';') { - PresetMeta::Parameters(header.to_string()) - } else if matches!(header.chars().next(), Some('1'..='6')) { - if header.len() < 3 - || !header.chars().nth(1).unwrap().is_ascii_digit() - || !header.chars().nth(2).unwrap().is_whitespace() - { - log::error!("Line for {:?} starts like a full header line, but it is incorrect; ignoring it.", path); - return; - } - let separator = header.chars().nth(2).unwrap(); - if separator != ' ' { - // the Gemini specification says that the third - // character has to be a space, so correct any - // other whitespace to it (e.g. tabs) - log::warn!("Full Header line for {:?} has an invalid character, treating {:?} as a space.", path, separator); - } - let status = header - .chars() - .take(2) - .collect::() - .parse::() - // unwrap since we alread checked it's a number - .unwrap(); - // not taking a slice here because the separator - // might be a whitespace wider than a byte - let meta = header.chars().skip(3).collect::(); - PresetMeta::FullHeader(status, meta) - } else { - // must be a MIME type, but without status code - PresetMeta::FullMime(header.to_string()) - }; - - let glob_options = MatchOptions { - case_sensitive: true, - // so there is a difference between "*" and "**". - require_literal_separator: true, - // security measure because entries for .hidden files - // would result in them being exposed. - require_literal_leading_dot: !crate::ARGS.serve_secret, - }; - - // process filename as glob - let paths = if let Some(path) = path.to_str() { - match glob_with(path, glob_options) { - Ok(paths) => paths.collect::>(), - Err(err) => { - log::error!("incorrect glob pattern: {}", err); - continue; - } - } - } else { - log::error!("path is not UTF-8: {:?}", path); - continue; - }; - - if paths.is_empty() { - // probably an entry for a nonexistent file, glob only works for existing files - self.file_meta.insert(path, preset); - } else { - for glob_result in paths { - match glob_result { - Ok(path) if path.is_dir() => { /* ignore */ } - Ok(path) => { - self.file_meta.insert(path, preset.clone()); - } - Err(err) => { - log::warn!("could not process glob path: {}", err); - continue; - } - }; - } } } } else { - log::error!("no YAML document {:?}", db); + log::error!("path is not UTF-8: {:?}", path); + continue; + }; + + if paths.is_empty() { + // probably an entry for a nonexistent file, glob only works for existing files + self.file_meta.insert(path, preset); + } else { + for glob_result in paths { + match glob_result { + Ok(path) if path.is_dir() => { /* ignore */ } + Ok(path) => { + self.file_meta.insert(path, preset.clone()); + } + Err(err) => { + log::warn!("could not process glob path: {}", err); + continue; + } + }; + } } - } else { - log::error!("could not read configuration file {:?}", db); } }