Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New Google Sheets connector #202

Merged
merged 13 commits into from
Sep 4, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions doc/connectors/google_sheets_2.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# This is the doc for the Google Sheets 2 connector

## Data provider configuration

* `type`: `"GoogleSheets2"`
* `name`: str, required
* `auth_flow`: str
* `baseroute`: str
* `secrets`: dict

The `auth_flow` property marks this as being a connector that uses the connector_oauth_manager for the oauth dance.

The `baseroute` is fixed and is 'https://sheets.googleapis.com/v4/spreadsheets/'.

The `secrets` dictionary contains the `access_token` and a `refresh_token` (if there is one). Though `secrets` is optional during the initial creation of the connector, it is necessary for when the user wants to make requests to the connector. If there is no `access_token`, an Exception is thrown.


```coffee
DATA_PROVIDERS: [
type: 'GoogleSheets'
name: '<name>'
,
...
]
```

## Data source configuration

* `domain`: str, required
* `name`: str, required. Should match the data provider name
* `spreadsheet_id`: str, required. Id of the spreadsheet which can be found inside
the url: https://docs.google.com/spreadsheets/d/<spreadsheet_id_is_here>/edit?pref=2&pli=1#gid=0,
* `sheet`: str. By default, the extractor returns the first sheet.
* `header_row`: int, default to 0. Row of the header of the spreadsheet


```coffee
DATA_SOURCES: [
domain: '<domain>'
name: '<name>'
spreadsheet_id: '<spreadsheet_id>'
sheet: '<sheet name>'
skip_rows: <skip_rows>
,
...
]
```
207 changes: 207 additions & 0 deletions tests/google_sheets_2/test_google_sheets_2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
from unittest.mock import Mock

import pytest
from pytest import fixture

import tests.general_helpers as helpers
from toucan_connectors.google_sheets_2.google_sheets_2_connector import (
GoogleSheets2Connector,
GoogleSheets2DataSource,
)

import_path = 'toucan_connectors.google_sheets_2.google_sheets_2_connector'


@fixture
def con():
return GoogleSheets2Connector(name='test_name')


@fixture
def con_with_secrets(con):
con.set_secrets({'access_token': 'foo', 'refresh_token': None})
return con


@fixture
def ds():
return GoogleSheets2DataSource(
name='test_name',
domain='test_domain',
sheet='Constants',
spreadsheet_id='1SMnhnmBm-Tup3SfhS03McCf6S4pS2xqjI6CAXSSBpHU',
)


@fixture
def ds_without_sheet():
return GoogleSheets2DataSource(
name='test_name',
domain='test_domain',
spreadsheet_id='1SMnhnmBm-Tup3SfhS03McCf6S4pS2xqjI6CAXSSBpHU',
)


FAKE_SHEET = {
'metadata': '...',
'values': [['country', 'city'], ['France', 'Paris'], ['England', 'London']],
}


@pytest.mark.asyncio
async def test_get_data(mocker, con):
"""It should return a result from fetch if all is ok."""
mocker.patch(f'{import_path}.fetch', return_value=helpers.build_future(FAKE_SHEET))

result = await con._get_data('/foo', 'myaccesstoken')

assert result == FAKE_SHEET


FAKE_SHEET_LIST_RESPONSE = {
'sheets': [
{
'properties': {'title': 'Foo'},
},
{
'properties': {'title': 'Bar'},
},
{
'properties': {'title': 'Baz'},
},
]
}


def get_columns_in_schema(schema):
"""Pydantic generates schema slightly differently in python <=3.7 and in python 3.8"""
try:
if schema.get('definitions'):
return schema['definitions']['sheet']['enum']
else:
return schema['properties']['sheet']['enum']
except KeyError:
return None


def test_get_form_with_secrets(mocker, con_with_secrets, ds):
"""It should return a list of spreadsheet titles."""
mocker.patch.object(GoogleSheets2Connector, '_run_fetch', return_value=FAKE_SHEET_LIST_RESPONSE)

result = ds.get_form(
connector=con_with_secrets,
current_config={'spreadsheet_id': '1SMnhnmBm-Tup3SfhS03McCf6S4pS2xqjI6CAXSSBpHU'},
)
expected_results = ['Foo', 'Bar', 'Baz']
assert get_columns_in_schema(result) == expected_results


def test_get_form_no_secrets(mocker, con, ds):
"""It should return no spreadsheet titles."""
mocker.patch.object(GoogleSheets2Connector, '_run_fetch', return_value=Exception)
result = ds.get_form(
connector=con,
current_config={'spreadsheet_id': '1SMnhnmBm-Tup3SfhS03McCf6S4pS2xqjI6CAXSSBpHU'},
)
assert not get_columns_in_schema(result)


def test_set_secrets(mocker, con):
"""It should set secrets on the connector."""
spy = mocker.spy(GoogleSheets2Connector, 'set_secrets')
fake_secrets = {
'access_token': 'myaccesstoken',
'refresh_token': None,
}
con.set_secrets(fake_secrets)

assert con.secrets == fake_secrets
spy.assert_called_once_with(con, fake_secrets)


def test_spreadsheet_success(mocker, con_with_secrets, ds):
"""It should return a spreadsheet."""
mocker.patch.object(GoogleSheets2Connector, '_run_fetch', return_value=FAKE_SHEET)

df = con_with_secrets.get_df(ds)

assert df.shape == (2, 2)
assert df.columns.tolist() == ['country', 'city']

ds.header_row = 1
df = con_with_secrets.get_df(ds)
assert df.shape == (1, 2)
assert df.columns.tolist() == ['France', 'Paris']


def test_spreadsheet_no_secrets(mocker, con, ds):
"""It should raise an exception if there no secrets passed or no access token."""
mocker.patch.object(GoogleSheets2Connector, '_run_fetch', return_value=FAKE_SHEET)

with pytest.raises(Exception) as err:
con.get_df(ds)

assert str(err.value) == 'No credentials'

con.set_secrets({'refresh_token': None})

with pytest.raises(KeyError):
con.get_df(ds)


def test_set_columns(mocker, con_with_secrets, ds):
"""It should return a well-formed column set."""
fake_results = {
'metadata': '...',
'values': [['Animateur', '', '', 'Week'], ['pika', '', 'a', 'W1'], ['bulbi', '', '', 'W2']],
}
mocker.patch.object(GoogleSheets2Connector, '_run_fetch', return_value=fake_results)

df = con_with_secrets.get_df(ds)
assert df.to_dict() == {
'Animateur': {1: 'pika', 2: 'bulbi'},
1: {1: '', 2: ''},
2: {1: 'a', 2: ''},
'Week': {1: 'W1', 2: 'W2'},
}


def test__run_fetch(mocker, con):
"""It should return a result from loops if all is ok."""
mocker.patch.object(
GoogleSheets2Connector, '_get_data', return_value=helpers.build_future(FAKE_SHEET)
)

result = con._run_fetch('/fudge', 'myaccesstoken')

assert result == FAKE_SHEET


def test_spreadsheet_without_sheet(mocker, con_with_secrets, ds_without_sheet):
"""
It should retrieve the first sheet of the spreadsheet if no sheet has been indicated
"""

def mock_api_responses(uri: str, _token):
if uri.endswith('/Foo'):
return FAKE_SHEET
else:
return FAKE_SHEET_LIST_RESPONSE

fetch_mock: Mock = mocker.patch.object(
GoogleSheets2Connector, '_run_fetch', side_effect=mock_api_responses
)
df = con_with_secrets.get_df(ds_without_sheet)

assert fetch_mock.call_count == 2
assert (
fetch_mock.call_args_list[0][0][0]
== 'https://sheets.googleapis.com/v4/spreadsheets/1SMnhnmBm-Tup3SfhS03McCf6S4pS2xqjI6CAXSSBpHU'
)
assert (
fetch_mock.call_args_list[1][0][0]
== 'https://sheets.googleapis.com/v4/spreadsheets/1SMnhnmBm-Tup3SfhS03McCf6S4pS2xqjI6CAXSSBpHU/values/Foo'
)

assert df.shape == (2, 2)
assert df.columns.tolist() == ['country', 'city']
40 changes: 40 additions & 0 deletions tests/test_common.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import pytest
from aiohttp import web

from toucan_connectors.common import (
NonValidVariable,
apply_query_parameters,
fetch,
nosql_apply_parameters_to_query,
)

Expand Down Expand Up @@ -183,3 +185,41 @@ def test_bad_variable_in_query():
with pytest.raises(NonValidVariable) as err:
nosql_apply_parameters_to_query(query, params, handle_errors=True)
assert str(err.value) == 'Non valid variable thing'


# fetch tests

FAKE_DATA = {'foo': 'bar', 'baz': 'fudge'}


async def send_200_success(req: web.Request):
"""Send a response with a success."""
return web.json_response(FAKE_DATA, status=200)


async def send_401_error(req: web.Request) -> dict:
"""Send a response with an error."""
return web.Response(reason='Unauthorized', status=401)


async def test_fetch_happy(aiohttp_client, loop):
"""It should return a properly-formed dictionary."""
app = web.Application(loop=loop)
app.router.add_get('/foo', send_200_success)

client = await aiohttp_client(app)
res = await fetch('/foo', client)

assert res == FAKE_DATA


async def test_fetch_bad_response(aiohttp_client, loop):
"""It should throw an Exception with a message if there is an error."""
app = web.Application(loop=loop)
app.router.add_get('/hotels', send_401_error)

client = await aiohttp_client(app)
with pytest.raises(Exception) as err:
await fetch('/hotels', client)

assert str(err.value) == 'Aborting request due to error from the API: 401, Unauthorized'
7 changes: 7 additions & 0 deletions toucan_connectors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,11 @@
'label': 'Google Sheets',
'logo': 'google_sheets/google-sheets.png',
},
'GoogleSheets2': {
'connector': 'google_sheets_2.google_sheets_2_connector.GoogleSheets2Connector',
'label': 'Google Sheets Modified',
'logo': 'google_sheets/google-sheets.png',
},
'GoogleSpreadsheet': {
'connector': 'google_spreadsheet.google_spreadsheet_connector.GoogleSpreadsheetConnector',
'label': 'Google Spreadsheet',
Expand Down Expand Up @@ -188,6 +193,8 @@ def html_base64_image_src(image_path: str) -> str:
connector_infos['connector'] = connector_cls
with suppress(AttributeError):
connector_infos['bearer_integration'] = connector_cls.bearer_integration
with suppress(AttributeError):
connector_infos['auth_flow'] = connector_cls.auth_flow
# check if connector implements `get_status`,
# which is hence different from `ToucanConnector.get_status`
connector_infos['hasStatusCheck'] = (
Expand Down
11 changes: 11 additions & 0 deletions toucan_connectors/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from copy import deepcopy

import pyjq
from aiohttp import ClientSession
from jinja2 import Environment, StrictUndefined, Template, meta
from pydantic import Field
from toucan_data_sdk.utils.helpers import slugify
Expand Down Expand Up @@ -204,3 +205,13 @@ def get_loop():
asyncio.set_event_loop(loop)

return loop


async def fetch(url: str, session: ClientSession):
"""Fetch data from an API."""
async with session.get(url) as res:
if res.status != 200:
raise Exception(
f'Aborting request due to error from the API: {res.status}, {res.reason}'
)
return await res.json()
Empty file.
Loading