Skip to content

Commit

Permalink
Support for reading and writing files directly to/from ftp
Browse files Browse the repository at this point in the history
  • Loading branch information
RachitSharma2001 committed Sep 17, 2022
1 parent 4c6bc38 commit 4c07b26
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 0 deletions.
94 changes: 94 additions & 0 deletions smart_open/ftp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2019 Radim Rehurek <[email protected]>
#
# This code is distributed under the terms and conditions
# from the MIT License (MIT).
#

"""Implements I/O streams over FTP.
"""

import getpass
import logging
import urllib.parse
import smart_open.utils

logger = logging.getLogger(__name__)

SCHEME = "ftp"

"""Supported URL schemes."""

DEFAULT_PORT = 21

URI_EXAMPLES = (
'ftp://username@host/path/file',
'ftp://username:password@host/path/file',
'ftp://username:password@host:port/path/file'
)


def _unquote(text):
return text and urllib.parse.unquote(text)


def parse_uri(uri_as_string):
split_uri = urllib.parse.urlsplit(uri_as_string)
assert split_uri.scheme in SCHEME
return dict(
scheme=split_uri.scheme,
uri_path=_unquote(split_uri.path),
user=_unquote(split_uri.username),
host=split_uri.hostname,
port=int(split_uri.port or DEFAULT_PORT),
password=_unquote(split_uri.password),
)


def open_uri(uri, mode, transport_params):
smart_open.utils.check_kwargs(open, transport_params)
parsed_uri = parse_uri(uri)
uri_path = parsed_uri.pop('uri_path')
parsed_uri.pop('scheme')
return open(uri_path, mode, transport_params=transport_params, **parsed_uri)


def convert_transport_params_to_args(transport_params):
supported_keywords = ['keyfile', 'certfile', 'context', 'timeout', 'source_address', 'encoding']
unsupported_keywords = [k for k in transport_params if k not in supported_keywords]
kwargs = {k: v for (k, v) in transport_params.items() if k in supported_keywords}

if unsupported_keywords:
logger.warning('ignoring unsupported ftp keyword arguments: %r', unsupported_keywords)

return kwargs


def _connect(hostname, username, port, password, transport_params):
try:
from ftplib import FTP_TLS
except ImportError:
raise Exception("Error: Unable to import FTP_TLS from ftplib library")
kwargs = convert_transport_params_to_args(transport_params)
ftp = FTP_TLS(**kwargs)
try:
ftp.connect(hostname, port)
ftp.sendcmd(f'USER {username}')
ftp.sendcmd(f'PASS {password}')
except Exception:
raise Exception("Unable to login to FTP server: either username, password, host, or port is wrong!")
return ftp


# transport paramaters can include any extra parameters that you want to be passed into FTP_TLS
def open(path, mode='r', host=None, user=None, password=None, port=DEFAULT_PORT, transport_params=None):
if not host:
raise ValueError('you must specify the host to connect to')
if not user:
user = getpass.getuser()
if not transport_params:
transport_params = {}
conn = _connect(host, user, port, password, transport_params)
fobj = conn.transfercmd(f"RETR {path}").makefile(mode)
return fobj
64 changes: 64 additions & 0 deletions smart_open/tests/test_ftp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# -*- coding: utf-8 -*-

import logging
import unittest
from unittest.mock import patch, call

import smart_open
from smart_open import ftp


class FTPOpen(unittest.TestCase):
@patch("ftplib.FTP_TLS")
def test_open_just_user(self, ftp_tls):
ftp_tls().transfercmd().makefile().name = "some/path"
smart_open.open("ftp://user@localhost/dir1/dir2/dir3/file")
ftp_tls().connect.assert_called_with("localhost", ftp.DEFAULT_PORT)
ftp_tls().sendcmd.assert_has_calls([call("USER user")])
ftp_tls().transfercmd.assert_called_with("RETR /dir1/dir2/dir3/file")

@patch("ftplib.FTP_TLS")
def test_open_user_pass(self, ftp_tls):
ftp_tls().transfercmd().makefile().name = "some/path"
smart_open.open("ftp://user:pass@localhost/dir1/dir2/dir3/file")
ftp_tls().connect.assert_called_with("localhost", ftp.DEFAULT_PORT)
ftp_tls().sendcmd.assert_has_calls([call("USER user"), call("PASS pass")])
ftp_tls().transfercmd.assert_called_with("RETR /dir1/dir2/dir3/file")

@patch("ftplib.FTP_TLS")
def test_open_user_pass_port(self, ftp_tls):
ftp_tls().transfercmd().makefile().name = "some/path"
smart_open.open("ftp://user:pass@localhost:29/dir1/dir2/dir3/file")
ftp_tls().connect.assert_called_with("localhost", 29)
ftp_tls().sendcmd.assert_has_calls([call("USER user"), call("PASS pass")])
ftp_tls().transfercmd.assert_called_with("RETR /dir1/dir2/dir3/file")

@patch("ftplib.FTP_TLS")
def test_open_with_transport_params(self, ftp_tls):
ftp_tls().transfercmd().makefile().name = "some/path"
smart_open.open(
"ftp://user@localhost/dir1/dir2/dir3/file",
transport_params={
"keyfile": "some_key_file",
"certfile": "some_cert",
"context": "some_context",
"timeout": 120,
"source_address": "some_addr",
"encoding": "some_encoding",
},
)
ftp_tls.assert_called_with(
keyfile="some_key_file",
certfile="some_cert",
context="some_context",
timeout=120,
source_address="some_addr",
encoding="some_encoding",
)


if __name__ == "__main__":
logging.basicConfig(
format="%(asctime)s : %(levelname)s : %(message)s", level=logging.DEBUG
)
unittest.main()
1 change: 1 addition & 0 deletions smart_open/transport.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ def get_transport(scheme):
register_transport('smart_open.http')
register_transport('smart_open.s3')
register_transport('smart_open.ssh')
register_transport('smart_open.ftp')
register_transport('smart_open.webhdfs')

SUPPORTED_SCHEMES = tuple(sorted(_REGISTRY.keys()))
Expand Down

0 comments on commit 4c07b26

Please sign in to comment.