From 4c07b264ad50fe3f11cf3691a2fbf5ef3594803c Mon Sep 17 00:00:00 2001 From: RachitSharma2001 Date: Fri, 16 Sep 2022 12:07:15 -0700 Subject: [PATCH] Support for reading and writing files directly to/from ftp --- smart_open/ftp.py | 94 ++++++++++++++++++++++++++++++++++++ smart_open/tests/test_ftp.py | 64 ++++++++++++++++++++++++ smart_open/transport.py | 1 + 3 files changed, 159 insertions(+) create mode 100644 smart_open/ftp.py create mode 100644 smart_open/tests/test_ftp.py diff --git a/smart_open/ftp.py b/smart_open/ftp.py new file mode 100644 index 00000000..1b46ba9d --- /dev/null +++ b/smart_open/ftp.py @@ -0,0 +1,94 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2019 Radim Rehurek +# +# This code is distributed under the terms and conditions +# from the MIT License (MIT). +# + +"""Implements I/O streams over FTP. +""" + +import getpass +import logging +import urllib.parse +import smart_open.utils + +logger = logging.getLogger(__name__) + +SCHEME = "ftp" + +"""Supported URL schemes.""" + +DEFAULT_PORT = 21 + +URI_EXAMPLES = ( + 'ftp://username@host/path/file', + 'ftp://username:password@host/path/file', + 'ftp://username:password@host:port/path/file' +) + + +def _unquote(text): + return text and urllib.parse.unquote(text) + + +def parse_uri(uri_as_string): + split_uri = urllib.parse.urlsplit(uri_as_string) + assert split_uri.scheme in SCHEME + return dict( + scheme=split_uri.scheme, + uri_path=_unquote(split_uri.path), + user=_unquote(split_uri.username), + host=split_uri.hostname, + port=int(split_uri.port or DEFAULT_PORT), + password=_unquote(split_uri.password), + ) + + +def open_uri(uri, mode, transport_params): + smart_open.utils.check_kwargs(open, transport_params) + parsed_uri = parse_uri(uri) + uri_path = parsed_uri.pop('uri_path') + parsed_uri.pop('scheme') + return open(uri_path, mode, transport_params=transport_params, **parsed_uri) + + +def convert_transport_params_to_args(transport_params): + supported_keywords = ['keyfile', 'certfile', 'context', 'timeout', 'source_address', 'encoding'] + unsupported_keywords = [k for k in transport_params if k not in supported_keywords] + kwargs = {k: v for (k, v) in transport_params.items() if k in supported_keywords} + + if unsupported_keywords: + logger.warning('ignoring unsupported ftp keyword arguments: %r', unsupported_keywords) + + return kwargs + + +def _connect(hostname, username, port, password, transport_params): + try: + from ftplib import FTP_TLS + except ImportError: + raise Exception("Error: Unable to import FTP_TLS from ftplib library") + kwargs = convert_transport_params_to_args(transport_params) + ftp = FTP_TLS(**kwargs) + try: + ftp.connect(hostname, port) + ftp.sendcmd(f'USER {username}') + ftp.sendcmd(f'PASS {password}') + except Exception: + raise Exception("Unable to login to FTP server: either username, password, host, or port is wrong!") + return ftp + + +# transport paramaters can include any extra parameters that you want to be passed into FTP_TLS +def open(path, mode='r', host=None, user=None, password=None, port=DEFAULT_PORT, transport_params=None): + if not host: + raise ValueError('you must specify the host to connect to') + if not user: + user = getpass.getuser() + if not transport_params: + transport_params = {} + conn = _connect(host, user, port, password, transport_params) + fobj = conn.transfercmd(f"RETR {path}").makefile(mode) + return fobj diff --git a/smart_open/tests/test_ftp.py b/smart_open/tests/test_ftp.py new file mode 100644 index 00000000..ea93ee50 --- /dev/null +++ b/smart_open/tests/test_ftp.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- + +import logging +import unittest +from unittest.mock import patch, call + +import smart_open +from smart_open import ftp + + +class FTPOpen(unittest.TestCase): + @patch("ftplib.FTP_TLS") + def test_open_just_user(self, ftp_tls): + ftp_tls().transfercmd().makefile().name = "some/path" + smart_open.open("ftp://user@localhost/dir1/dir2/dir3/file") + ftp_tls().connect.assert_called_with("localhost", ftp.DEFAULT_PORT) + ftp_tls().sendcmd.assert_has_calls([call("USER user")]) + ftp_tls().transfercmd.assert_called_with("RETR /dir1/dir2/dir3/file") + + @patch("ftplib.FTP_TLS") + def test_open_user_pass(self, ftp_tls): + ftp_tls().transfercmd().makefile().name = "some/path" + smart_open.open("ftp://user:pass@localhost/dir1/dir2/dir3/file") + ftp_tls().connect.assert_called_with("localhost", ftp.DEFAULT_PORT) + ftp_tls().sendcmd.assert_has_calls([call("USER user"), call("PASS pass")]) + ftp_tls().transfercmd.assert_called_with("RETR /dir1/dir2/dir3/file") + + @patch("ftplib.FTP_TLS") + def test_open_user_pass_port(self, ftp_tls): + ftp_tls().transfercmd().makefile().name = "some/path" + smart_open.open("ftp://user:pass@localhost:29/dir1/dir2/dir3/file") + ftp_tls().connect.assert_called_with("localhost", 29) + ftp_tls().sendcmd.assert_has_calls([call("USER user"), call("PASS pass")]) + ftp_tls().transfercmd.assert_called_with("RETR /dir1/dir2/dir3/file") + + @patch("ftplib.FTP_TLS") + def test_open_with_transport_params(self, ftp_tls): + ftp_tls().transfercmd().makefile().name = "some/path" + smart_open.open( + "ftp://user@localhost/dir1/dir2/dir3/file", + transport_params={ + "keyfile": "some_key_file", + "certfile": "some_cert", + "context": "some_context", + "timeout": 120, + "source_address": "some_addr", + "encoding": "some_encoding", + }, + ) + ftp_tls.assert_called_with( + keyfile="some_key_file", + certfile="some_cert", + context="some_context", + timeout=120, + source_address="some_addr", + encoding="some_encoding", + ) + + +if __name__ == "__main__": + logging.basicConfig( + format="%(asctime)s : %(levelname)s : %(message)s", level=logging.DEBUG + ) + unittest.main() diff --git a/smart_open/transport.py b/smart_open/transport.py index 00fb27d7..abedcac3 100644 --- a/smart_open/transport.py +++ b/smart_open/transport.py @@ -100,6 +100,7 @@ def get_transport(scheme): register_transport('smart_open.http') register_transport('smart_open.s3') register_transport('smart_open.ssh') +register_transport('smart_open.ftp') register_transport('smart_open.webhdfs') SUPPORTED_SCHEMES = tuple(sorted(_REGISTRY.keys()))