Skip to content

Commit

Permalink
added dest_dir argument to download_file method (#32)
Browse files Browse the repository at this point in the history
  • Loading branch information
dariobauer authored Feb 20, 2022
1 parent fa32edd commit d36aedd
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 16 deletions.
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## Unreleased

* Add destination argument to download_file method (Issue [#30](https://github.com/dariobauer/graph-onedrive/issues/30))
* Fixed bug in download_file method when verbose was set to false (Issue [#29](https://github.com/dariobauer/graph-onedrive/issues/29))

## Released
Expand Down
9 changes: 5 additions & 4 deletions docs/DOCUMENTATION.md
Original file line number Diff line number Diff line change
Expand Up @@ -676,10 +676,10 @@ Returns:
Downloads a file to the current working directory asynchronously with multiple concurrent http requests file files larger than 1mb.
Note folders cannot be downloaded, you need to implement a loop instead.

Future development improvement: specify the file location and name.

```python
file_name = my_instance.download_file(item_id, max_connections=8, verbose=False)
file_path = my_instance.download_file(
item_id, max_connections=8, dest_dir=None, verbose=False
)
```

Positional arguments:
Expand All @@ -689,11 +689,12 @@ Positional arguments:
Keyword arguments:

* max_connections (int) -- max concurrent open http requests, refer throttling warning in the gotcha section at the bottom of the docs
* dest_dir (str | Path) -- destination directory for the downloaded file, default is current working directory (default = None)
* verbose (bool) -- prints the download progress (default = False)

Returns:

* file_name (str) -- returns the name of the file including extension
* file_path (Path) -- returns the path to the downloaded file including extension

#### upload_file

Expand Down
2 changes: 1 addition & 1 deletion src/graph_onedrive/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.3.1dev1"
__version__ = "0.3.1dev2"

import logging

Expand Down
42 changes: 31 additions & 11 deletions src/graph_onedrive/_onedrive.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from pathlib import Path
from time import sleep
from typing import Any
from typing import Optional

import aiofiles
import httpx
Expand Down Expand Up @@ -1173,17 +1174,22 @@ def delete_item(self, item_id: str, pre_confirm: bool = False) -> bool:

@token_required
def download_file(
self, item_id: str, max_connections: int = 8, verbose: bool = False
self,
item_id: str,
max_connections: int = 8,
dest_dir: str | Path | None = None,
verbose: bool = False,
) -> str:
"""Downloads a file to the current working directory asynchronously.
Note folders cannot be downloaded, you need to use a loop instead.
Positional arguments:
item_id (str) -- item id of the file to be deleted
Keyword arguments:
max_connections (int) -- max concurrent open http requests, refer Docs regarding throttling limits
dest_dir (str | Path) -- destination directory for the downloaded file, default is current working directory (default = None)
verbose (bool) -- prints status message during the download process (default = False)
Returns:
file_name (str) -- returns the name of the file including extension
file_path (Path) -- returns the path of the file including extension
"""
# Validate item_id
if not isinstance(item_id, str):
Expand All @@ -1193,6 +1199,17 @@ def download_file(
raise TypeError(
f"max_connections expected 'int', got {type(max_connections).__name__!r}"
)
# Validate dest_dir
if dest_dir is None:
dest_dir = Path.cwd()
elif not isinstance(dest_dir, str) and not isinstance(dest_dir, Path):
raise TypeError(
f"dest_dir expected 'str' or 'Path', got {type(dest_dir).__name__!r}"
)
dest_dir = Path(dest_dir)
if dest_dir.is_dir() is False:
raise ValueError(f"dest_dir {dest_dir} is not a directory")
# Check max connections is not excessive
if max_connections > 16:
warnings.warn(
f"max_connections={max_connections} could result in throttling and enforced cool-down period",
Expand All @@ -1204,10 +1221,11 @@ def download_file(
if "folder" in file_details:
raise ValueError("item_id provided is for a folder, expected file item id")
file_name = file_details["name"]
file_path = dest_dir / file_name
size = file_details["size"]
# If the file is empty, just create it and return
if size == 0:
Path(file_name).touch()
file_path.touch()
logger.warning(f"downloaded file size=0, empty file '{file_name}' created.")
return file_name
# Create request url based on input item id to be downloaded
Expand All @@ -1223,32 +1241,32 @@ def download_file(
# Download the file asynchronously
asyncio.run(
self._download_async(
download_url, file_name, size, max_connections, verbose
download_url, file_path, size, max_connections, verbose
)
)
# Return the file name
return file_name
return file_path

async def _download_async(
self,
download_url: str,
file_name: str,
file_path: Path,
file_size: int,
max_connections: int = 8,
verbose: bool = False,
) -> None:
"""INTERNAL: Creates a list of co-routines each downloading one part of the file, and starts them.
Positional arguments:
download_url (str) -- url of the file to download
file_name (str) -- name of the final file
file_path (Path) -- path of the final file
file_size (int) -- size of the file being downloaded
Keyword arguments:
max_connections (int) -- max concurrent open http requests
verbose (bool) -- prints status message during the download process (default = False)
"""
# Assert rather then check as this is an internal method
assert isinstance(download_url, str)
assert isinstance(file_name, str)
assert isinstance(file_path, Path)
assert isinstance(file_size, int)
assert isinstance(max_connections, int)
tasks = list()
Expand All @@ -1270,14 +1288,16 @@ async def _download_async(
if verbose:
pretty_size = round(file_size / 1000000, 1)
print(
f"File {file_name} ({pretty_size}mb) will be downloaded in {num_coroutines} segments."
f"File {file_path.name} ({pretty_size}mb) will be downloaded in {num_coroutines} segments."
)
logger.debug(
f"file_size={file_size}B, min_typ_chunk_size={min_typ_chunk_size}B, num_coroutines={num_coroutines}, typ_chunk_size={typ_chunk_size}"
)
for i in range(num_coroutines):
# Get the file part Path, placed in the temp directory
part_file_path = Path(tmp_dir).joinpath(file_name + "." + str(i + 1))
part_file_path = Path(tmp_dir).joinpath(
file_path.name + "." + str(i + 1)
)
# We save the file part Path for later use
file_part_names.append(part_file_path)
# On first iteration will be 0
Expand All @@ -1303,7 +1323,7 @@ async def _download_async(
# Join the downloaded file parts
if verbose:
print("Joining individual segments into single file")
with open(file_name, "wb") as fw:
with open(file_path, "wb") as fw:
for file_part in file_part_names:
with open(file_part, "rb") as fr:
shutil.copyfileobj(fr, fw)
Expand Down

0 comments on commit d36aedd

Please sign in to comment.