Skip to content

Commit

Permalink
Support for reading and writing files directly to/from ftp
Browse files Browse the repository at this point in the history
  • Loading branch information
RachitSharma2001 committed Sep 19, 2022
1 parent 4c6bc38 commit 8a8c990
Show file tree
Hide file tree
Showing 3 changed files with 179 additions and 0 deletions.
114 changes: 114 additions & 0 deletions smart_open/ftp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2019 Radim Rehurek <[email protected]>
#
# This code is distributed under the terms and conditions
# from the MIT License (MIT).
#

"""Implements I/O streams over FTP.
"""

import getpass
import logging
import urllib.parse
import smart_open.utils

try:
from ftplib import FTP
except ImportError:
MISSING_DEPS = True

logger = logging.getLogger(__name__)

SCHEME = "ftp"

"""Supported URL schemes."""

DEFAULT_PORT = 21

URI_EXAMPLES = (
"ftp://username@host/path/file",
"ftp://username:password@host/path/file",
"ftp://username:password@host:port/path/file",
)


def _unquote(text):
return text and urllib.parse.unquote(text)


def parse_uri(uri_as_string):
split_uri = urllib.parse.urlsplit(uri_as_string)
assert split_uri.scheme in SCHEME
return dict(
scheme=split_uri.scheme,
uri_path=_unquote(split_uri.path),
user=_unquote(split_uri.username),
host=split_uri.hostname,
port=int(split_uri.port or DEFAULT_PORT),
password=_unquote(split_uri.password),
)


def open_uri(uri, mode, transport_params):
smart_open.utils.check_kwargs(open, transport_params)
parsed_uri = parse_uri(uri)
uri_path = parsed_uri.pop("uri_path")
parsed_uri.pop("scheme")
return open(uri_path, mode, transport_params=transport_params, **parsed_uri)


def convert_transport_params_to_args(transport_params):
supported_keywords = [
"keyfile",
"certfile",
"context",
"timeout",
"source_address",
"encoding",
]
unsupported_keywords = [k for k in transport_params if k not in supported_keywords]
kwargs = {k: v for (k, v) in transport_params.items() if k in supported_keywords}

if unsupported_keywords:
logger.warning(
"ignoring unsupported ftp keyword arguments: %r", unsupported_keywords
)

return kwargs


def _connect(hostname, username, port, password, transport_params):
kwargs = convert_transport_params_to_args(transport_params)
ftp = FTP(**kwargs)
try:
ftp.connect(hostname, port)
ftp.sendcmd(f"USER {username}")
ftp.sendcmd(f"PASS {password}")
except Exception:
raise Exception(
"Unable to login to FTP server: either username, password, host, or port is wrong!"
)
return ftp


# transport paramaters can include any extra parameters that you want to be passed into FTP_TLS
def open(
path,
mode="r",
host=None,
user=None,
password=None,
port=DEFAULT_PORT,
transport_params=None,
):
if not host:
raise ValueError("you must specify the host to connect to")
if not user:
user = getpass.getuser()
if not transport_params:
transport_params = {}
conn = _connect(host, user, port, password, transport_params)
fobj = conn.transfercmd(f"RETR {path}").makefile(mode)
return fobj
64 changes: 64 additions & 0 deletions smart_open/tests/test_ftp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# -*- coding: utf-8 -*-

import logging
import unittest
from unittest.mock import patch, call

import smart_open
from smart_open import ftp


class FTPOpen(unittest.TestCase):
@patch("smart_open.ftp.FTP")
def test_open_just_user(self, ftp_obj):
ftp_obj().transfercmd().makefile().name = "some/path"
smart_open.open("ftp://user@localhost/dir1/dir2/dir3/file")
ftp_obj().connect.assert_called_with("localhost", ftp.DEFAULT_PORT)
ftp_obj().sendcmd.assert_has_calls([call("USER user")])
ftp_obj().transfercmd.assert_called_with("RETR /dir1/dir2/dir3/file")

@patch("smart_open.ftp.FTP")
def test_open_user_pass(self, ftp_obj):
ftp_obj().transfercmd().makefile().name = "some/path"
smart_open.open("ftp://user:pass@localhost/dir1/dir2/dir3/file")
ftp_obj().connect.assert_called_with("localhost", ftp.DEFAULT_PORT)
ftp_obj().sendcmd.assert_has_calls([call("USER user"), call("PASS pass")])
ftp_obj().transfercmd.assert_called_with("RETR /dir1/dir2/dir3/file")

@patch("smart_open.ftp.FTP")
def test_open_user_pass_port(self, ftp_obj):
ftp_obj().transfercmd().makefile().name = "some/path"
smart_open.open("ftp://user:pass@localhost:29/dir1/dir2/dir3/file")
ftp_obj().connect.assert_called_with("localhost", 29)
ftp_obj().sendcmd.assert_has_calls([call("USER user"), call("PASS pass")])
ftp_obj().transfercmd.assert_called_with("RETR /dir1/dir2/dir3/file")

@patch("smart_open.ftp.FTP")
def test_open_with_transport_params(self, ftp_obj):
ftp_obj().transfercmd().makefile().name = "some/path"
smart_open.open(
"ftp://user@localhost/dir1/dir2/dir3/file",
transport_params={
"keyfile": "some_key_file",
"certfile": "some_cert",
"context": "some_context",
"timeout": 120,
"source_address": "some_addr",
"encoding": "some_encoding",
},
)
ftp_obj.assert_called_with(
keyfile="some_key_file",
certfile="some_cert",
context="some_context",
timeout=120,
source_address="some_addr",
encoding="some_encoding",
)


if __name__ == "__main__":
logging.basicConfig(
format="%(asctime)s : %(levelname)s : %(message)s", level=logging.DEBUG
)
unittest.main()
1 change: 1 addition & 0 deletions smart_open/transport.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ def get_transport(scheme):
register_transport('smart_open.http')
register_transport('smart_open.s3')
register_transport('smart_open.ssh')
register_transport('smart_open.ftp')
register_transport('smart_open.webhdfs')

SUPPORTED_SCHEMES = tuple(sorted(_REGISTRY.keys()))
Expand Down

0 comments on commit 8a8c990

Please sign in to comment.