Skip to content

Commit

Permalink
Merge pull request #505 from Oxen-AI/fix/parsing_urls
Browse files Browse the repository at this point in the history
fix broken url decoding
  • Loading branch information
EloyMartinez authored Jan 14, 2025
2 parents 312bb4a + 386b94a commit 3e601e7
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 48 deletions.
3 changes: 2 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ mp4 = "0.14.0"
minus = { version = "5.4.0", features = ["static_output", "search"] }
nom = "7.1.3"
num_cpus = "1.16.0"
percent-encoding = "2.1"
pluralizer = "0.4.0"
polars = { version = "0.44.2", features = [
"lazy",
Expand Down
74 changes: 73 additions & 1 deletion src/lib/src/api/client/dir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::error::OxenError;
use crate::model::metadata::generic_metadata::GenericMetadata;
use crate::model::metadata::MetadataDir;
use crate::model::RemoteRepository;
use crate::view::PaginatedDirEntries;
use crate::view::{PaginatedDirEntries, PaginatedDirEntriesResponse};

pub async fn list_root(remote_repo: &RemoteRepository) -> Result<PaginatedDirEntries, OxenError> {
list(
Expand Down Expand Up @@ -64,6 +64,29 @@ pub async fn file_counts(
}
}

pub async fn get_dir(
remote_repo: &RemoteRepository,
revision: impl AsRef<str>,
path: impl AsRef<Path>,
) -> Result<PaginatedDirEntriesResponse, OxenError> {
let path_str = path.as_ref().to_string_lossy();
let revision = revision.as_ref();
let uri = format!("/dir/{revision}/{path_str}");
let url = api::endpoint::url_from_repo(remote_repo, &uri)?;

let client = client::new_for_url(&url)?;
let res = client.get(&url).send().await?;
let body = client::parse_json_body(&url, res).await?;
let response: Result<PaginatedDirEntriesResponse, serde_json::Error> =
serde_json::from_str(&body);
match response {
Ok(val) => Ok(val),
Err(err) => Err(OxenError::basic_str(format!(
"api::dir::get_dir error parsing response from {url}\n\nErr {err:?} \n\n{body}"
))),
}
}

#[cfg(test)]
mod tests {
use crate::api;
Expand All @@ -75,6 +98,7 @@ mod tests {
use crate::repositories;
use crate::test;
use crate::util;
use crate::view::StatusMessage;

use std::path::Path;

Expand Down Expand Up @@ -303,4 +327,52 @@ mod tests {
})
.await
}

#[tokio::test]
async fn test_get_dir_encoding() -> Result<(), OxenError> {
test::run_readme_remote_repo_test(|local_repo, remote_repo| async move {
let mut local_repo = local_repo;

command::config::set_remote(
&mut local_repo,
constants::DEFAULT_REMOTE_NAME,
&remote_repo.remote.url,
)?;
let repo_path = local_repo.path.join("dir=dir");
util::fs::create_dir_all(&repo_path)?;
let file_path = repo_path.join("file example.txt");
util::fs::write_to_path(&file_path, "Hello World")?;
repositories::add(&local_repo, &file_path)?;
repositories::commit(&local_repo, "Adding README")?;
repositories::push(&local_repo).await?;

let dir_response =
api::client::dir::get_dir(&remote_repo, DEFAULT_BRANCH_NAME, "dir=dir").await?;
assert_eq!(dir_response.status.status, "success");

// Assert the directory is present and named "dir=dir"
if let Some(ref dir) = dir_response.entries.dir {
assert_eq!(dir.filename, "dir=dir");
assert!(dir.is_dir);
} else {
panic!("Directory 'dir=dir' not found");
}

// Assert the file "file example.txt" is present in the entries
let file_entry = dir_response
.entries
.entries
.iter()
.find(|entry| entry.filename == "file example.txt");
match file_entry {
Some(file) => {
assert_eq!(file.filename, "file example.txt");
assert!(!file.is_dir);
}
None => panic!("File 'file example.txt' not found"),
}
Ok(remote_repo)
})
.await
}
}
43 changes: 4 additions & 39 deletions src/lib/src/util/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1357,48 +1357,13 @@ pub fn path_relative_to_dir(
path: impl AsRef<Path>,
dir: impl AsRef<Path>,
) -> Result<PathBuf, OxenError> {
// Embedded canonicalize calls to catch every case:
// -- if both paths can be canonicalized, they both will be
// -- if only the dir can be canonicalized, only the dir will be
// -- if the dir cannot be canonicalized, neither will be
let (path, dir) = match canonicalize(&dir) {
Ok(canon_dir) => {
/*log::debug!(
"dir {:?} canonicalized. Checking path {:?}",
dir.as_ref(),
path.as_ref()
);*/
match canonicalize(&path) {
Ok(canon_path) => (canon_path, canon_dir),
// '_' because the debug statement is commented out
Err(_err) => {
/*log::debug!(
"Err with canonicalization: {err:?}. Returning path {:?} immediately",
path.as_ref()
);*/
return Ok(path.as_ref().to_path_buf());
}
}
}
// '_' because the debug statement is commented out
Err(_err) => {
/*log::debug!(
"Err with canonicalization: {err:?}. Skipping canonicalization of path {:?}",
path.as_ref()
);*/
(path.as_ref().to_path_buf(), dir.as_ref().to_path_buf())
}
};
let path = path.as_ref();
let dir = dir.as_ref();

let mut mut_path = path.clone();
let mut mut_path = path.to_path_buf();
let mut components: Vec<PathBuf> = vec![];
while mut_path.parent().is_some() {
/*log::debug!(
"Comparing {:?} => {:?} => {:?}",
path,
mut_path.parent(),
dir
);*/
// println!("Comparing {:?} => {:?} => {:?}", path, mut_path.parent(), dir);
if let Some(filename) = mut_path.file_name() {
if mut_path != dir {
components.push(PathBuf::from(filename));
Expand Down
2 changes: 1 addition & 1 deletion src/server/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ edition = "2021"
actix-files = "0.6.0"
actix-http = "3.0.4"
actix-service = "2.0.2"
form_urlencoded = "1.2.1"
url = "2.5.0"
actix-multipart = "0.7.2"
actix-web = { version = "4", features = ["rustls"] }
Expand All @@ -32,6 +31,7 @@ liboxen = { path = "../lib" }
log = "0.4.17"
lru = "0.12.0"
os_path = "0.8.0"
percent-encoding = "2.1"
polars = { version = "0.44.2", features = [
"lazy",
"parquet",
Expand Down
14 changes: 8 additions & 6 deletions src/server/src/params.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use liboxen::util::oxen_version::OxenVersion;

use crate::app_data::OxenAppData;
use crate::errors::OxenHttpError;
use percent_encoding::percent_decode;

pub mod aggregate_query;
pub use aggregate_query::AggregateQuery;
Expand All @@ -28,8 +29,6 @@ pub use df_opts_query::DFOptsQuery;
pub mod tree_depth;
pub use tree_depth::TreeDepthQuery;

use url::form_urlencoded;

pub fn app_data(req: &HttpRequest) -> Result<&OxenAppData, OxenHttpError> {
log::debug!(
"Get user agent from app data (app_data) {:?}",
Expand Down Expand Up @@ -85,6 +84,12 @@ pub fn path_param_to_vec(
Ok(values)
}

fn decode_resource_path(resource_path_str: &str) -> String {
percent_decode(resource_path_str.as_bytes())
.decode_utf8_lossy()
.into_owned()
}

pub fn parse_resource(
req: &HttpRequest,
repo: &LocalRepository,
Expand All @@ -93,10 +98,7 @@ pub fn parse_resource(
let resource_path_str = resource.to_string_lossy();

// Decode the URL, handling both %20 and + as spaces
let decoded_path = form_urlencoded::parse(resource_path_str.as_bytes())
.map(|(key, _)| key.into_owned())
.next()
.unwrap_or_default();
let decoded_path = decode_resource_path(&resource_path_str);

let decoded_resource = PathBuf::from(decoded_path);
log::debug!(
Expand Down

0 comments on commit 3e601e7

Please sign in to comment.