Skip to content
This repository has been archived by the owner on Oct 14, 2020. It is now read-only.

Commit

Permalink
Merge pull request getmoto#1750 from TheDooner64/glue-data-catalog
Browse files Browse the repository at this point in the history
Scaffolding for AWS Glue Data Catalog
  • Loading branch information
spulec authored Aug 8, 2018
2 parents ba9e795 + 3830757 commit 42d486f
Show file tree
Hide file tree
Showing 15 changed files with 356 additions and 2 deletions.
1 change: 1 addition & 0 deletions moto/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from .emr import mock_emr, mock_emr_deprecated # flake8: noqa
from .events import mock_events # flake8: noqa
from .glacier import mock_glacier, mock_glacier_deprecated # flake8: noqa
from .glue import mock_glue # flake8: noqa
from .iam import mock_iam, mock_iam_deprecated # flake8: noqa
from .kinesis import mock_kinesis, mock_kinesis_deprecated # flake8: noqa
from .kms import mock_kms, mock_kms_deprecated # flake8: noqa
Expand Down
2 changes: 2 additions & 0 deletions moto/backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from moto.emr import emr_backends
from moto.events import events_backends
from moto.glacier import glacier_backends
from moto.glue import glue_backends
from moto.iam import iam_backends
from moto.instance_metadata import instance_metadata_backends
from moto.kinesis import kinesis_backends
Expand Down Expand Up @@ -65,6 +66,7 @@
'events': events_backends,
'emr': emr_backends,
'glacier': glacier_backends,
'glue': glue_backends,
'iam': iam_backends,
'moto_api': moto_api_backends,
'instance_metadata': instance_metadata_backends,
Expand Down
5 changes: 5 additions & 0 deletions moto/glue/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from __future__ import unicode_literals
from .models import glue_backend

glue_backends = {"global": glue_backend}
mock_glue = glue_backend.decorator
24 changes: 24 additions & 0 deletions moto/glue/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from __future__ import unicode_literals
from moto.core.exceptions import JsonRESTError


class GlueClientError(JsonRESTError):
code = 400


class DatabaseAlreadyExistsException(GlueClientError):
def __init__(self):
self.code = 400
super(DatabaseAlreadyExistsException, self).__init__(
'DatabaseAlreadyExistsException',
'Database already exists.'
)


class TableAlreadyExistsException(GlueClientError):
def __init__(self):
self.code = 400
super(TableAlreadyExistsException, self).__init__(
'TableAlreadyExistsException',
'Table already exists.'
)
60 changes: 60 additions & 0 deletions moto/glue/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from __future__ import unicode_literals

from moto.core import BaseBackend, BaseModel
from moto.compat import OrderedDict
from.exceptions import DatabaseAlreadyExistsException, TableAlreadyExistsException


class GlueBackend(BaseBackend):

def __init__(self):
self.databases = OrderedDict()

def create_database(self, database_name):
if database_name in self.databases:
raise DatabaseAlreadyExistsException()

database = FakeDatabase(database_name)
self.databases[database_name] = database
return database

def get_database(self, database_name):
return self.databases[database_name]

def create_table(self, database_name, table_name, table_input):
database = self.get_database(database_name)

if table_name in database.tables:
raise TableAlreadyExistsException()

table = FakeTable(database_name, table_name, table_input)
database.tables[table_name] = table
return table

def get_table(self, database_name, table_name):
database = self.get_database(database_name)
return database.tables[table_name]

def get_tables(self, database_name):
database = self.get_database(database_name)
return [table for table_name, table in database.tables.items()]


class FakeDatabase(BaseModel):

def __init__(self, database_name):
self.name = database_name
self.tables = OrderedDict()


class FakeTable(BaseModel):

def __init__(self, database_name, table_name, table_input):
self.database_name = database_name
self.name = table_name
self.table_input = table_input
self.storage_descriptor = self.table_input.get('StorageDescriptor', {})
self.partition_keys = self.table_input.get('PartitionKeys', [])


glue_backend = GlueBackend()
63 changes: 63 additions & 0 deletions moto/glue/responses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from __future__ import unicode_literals

import json

from moto.core.responses import BaseResponse
from .models import glue_backend


class GlueResponse(BaseResponse):

@property
def glue_backend(self):
return glue_backend

@property
def parameters(self):
return json.loads(self.body)

def create_database(self):
database_name = self.parameters['DatabaseInput']['Name']
self.glue_backend.create_database(database_name)
return ""

def get_database(self):
database_name = self.parameters.get('Name')
database = self.glue_backend.get_database(database_name)
return json.dumps({'Database': {'Name': database.name}})

def create_table(self):
database_name = self.parameters.get('DatabaseName')
table_input = self.parameters.get('TableInput')
table_name = table_input.get('Name')
self.glue_backend.create_table(database_name, table_name, table_input)
return ""

def get_table(self):
database_name = self.parameters.get('DatabaseName')
table_name = self.parameters.get('Name')
table = self.glue_backend.get_table(database_name, table_name)
return json.dumps({
'Table': {
'DatabaseName': table.database_name,
'Name': table.name,
'PartitionKeys': table.partition_keys,
'StorageDescriptor': table.storage_descriptor
}
})

def get_tables(self):
database_name = self.parameters.get('DatabaseName')
tables = self.glue_backend.get_tables(database_name)
return json.dumps(
{
'TableList': [
{
'DatabaseName': table.database_name,
'Name': table.name,
'PartitionKeys': table.partition_keys,
'StorageDescriptor': table.storage_descriptor
} for table in tables
]
}
)
11 changes: 11 additions & 0 deletions moto/glue/urls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from __future__ import unicode_literals

from .responses import GlueResponse

url_bases = [
"https?://glue(.*).amazonaws.com"
]

url_paths = {
'{0}/$': GlueResponse.dispatch
}
1 change: 1 addition & 0 deletions moto/glue/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from __future__ import unicode_literals
1 change: 1 addition & 0 deletions tests/test_glue/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from __future__ import unicode_literals
1 change: 1 addition & 0 deletions tests/test_glue/fixtures/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from __future__ import unicode_literals
31 changes: 31 additions & 0 deletions tests/test_glue/fixtures/datacatalog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from __future__ import unicode_literals

TABLE_INPUT = {
'Owner': 'a_fake_owner',
'Parameters': {
'EXTERNAL': 'TRUE',
},
'Retention': 0,
'StorageDescriptor': {
'BucketColumns': [],
'Compressed': False,
'InputFormat': 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat',
'NumberOfBuckets': -1,
'OutputFormat': 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat',
'Parameters': {},
'SerdeInfo': {
'Parameters': {
'serialization.format': '1'
},
'SerializationLibrary': 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
},
'SkewedInfo': {
'SkewedColumnNames': [],
'SkewedColumnValueLocationMaps': {},
'SkewedColumnValues': []
},
'SortColumns': [],
'StoredAsSubDirectories': False
},
'TableType': 'EXTERNAL_TABLE',
}
46 changes: 46 additions & 0 deletions tests/test_glue/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from __future__ import unicode_literals

import copy

from .fixtures.datacatalog import TABLE_INPUT


def create_database(client, database_name):
return client.create_database(
DatabaseInput={
'Name': database_name
}
)


def get_database(client, database_name):
return client.get_database(Name=database_name)


def create_table_input(table_name, s3_location, columns=[], partition_keys=[]):
table_input = copy.deepcopy(TABLE_INPUT)
table_input['Name'] = table_name
table_input['PartitionKeys'] = partition_keys
table_input['StorageDescriptor']['Columns'] = columns
table_input['StorageDescriptor']['Location'] = s3_location
return table_input


def create_table(client, database_name, table_name, table_input):
return client.create_table(
DatabaseName=database_name,
TableInput=table_input
)


def get_table(client, database_name, table_name):
return client.get_table(
DatabaseName=database_name,
Name=table_name
)


def get_tables(client, database_name):
return client.get_tables(
DatabaseName=database_name
)
108 changes: 108 additions & 0 deletions tests/test_glue/test_datacatalog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
from __future__ import unicode_literals

import sure # noqa
from nose.tools import assert_raises
import boto3
from botocore.client import ClientError

from moto import mock_glue
from . import helpers


@mock_glue
def test_create_database():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
helpers.create_database(client, database_name)

response = helpers.get_database(client, database_name)
database = response['Database']

database.should.equal({'Name': database_name})


@mock_glue
def test_create_database_already_exists():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'cantcreatethisdatabasetwice'
helpers.create_database(client, database_name)

with assert_raises(ClientError) as exc:
helpers.create_database(client, database_name)

exc.exception.response['Error']['Code'].should.equal('DatabaseAlreadyExistsException')


@mock_glue
def test_create_table():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
helpers.create_database(client, database_name)

table_name = 'myspecialtable'
s3_location = 's3://my-bucket/{database_name}/{table_name}'.format(
database_name=database_name,
table_name=table_name
)

table_input = helpers.create_table_input(table_name, s3_location)
helpers.create_table(client, database_name, table_name, table_input)

response = helpers.get_table(client, database_name, table_name)
table = response['Table']

table['Name'].should.equal(table_input['Name'])
table['StorageDescriptor'].should.equal(table_input['StorageDescriptor'])
table['PartitionKeys'].should.equal(table_input['PartitionKeys'])


@mock_glue
def test_create_table_already_exists():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
helpers.create_database(client, database_name)

table_name = 'cantcreatethistabletwice'
s3_location = 's3://my-bucket/{database_name}/{table_name}'.format(
database_name=database_name,
table_name=table_name
)

table_input = helpers.create_table_input(table_name, s3_location)
helpers.create_table(client, database_name, table_name, table_input)

with assert_raises(ClientError) as exc:
helpers.create_table(client, database_name, table_name, table_input)

exc.exception.response['Error']['Code'].should.equal('TableAlreadyExistsException')


@mock_glue
def test_get_tables():
client = boto3.client('glue', region_name='us-east-1')
database_name = 'myspecialdatabase'
helpers.create_database(client, database_name)

table_names = ['myfirsttable', 'mysecondtable', 'mythirdtable']
table_inputs = {}

for table_name in table_names:
s3_location = 's3://my-bucket/{database_name}/{table_name}'.format(
database_name=database_name,
table_name=table_name
)
table_input = helpers.create_table_input(table_name, s3_location)
table_inputs[table_name] = table_input
helpers.create_table(client, database_name, table_name, table_input)

response = helpers.get_tables(client, database_name)

tables = response['TableList']

assert len(tables) == 3

for table in tables:
table_name = table['Name']
table_name.should.equal(table_inputs[table_name]['Name'])
table['StorageDescriptor'].should.equal(table_inputs[table_name]['StorageDescriptor'])
table['PartitionKeys'].should.equal(table_inputs[table_name]['PartitionKeys'])
2 changes: 1 addition & 1 deletion tests/test_s3/test_s3_storageclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,6 @@ def test_s3_default_storage_class():

# tests that the default storage class is still STANDARD
list_of_objects["Contents"][0]["StorageClass"].should.equal("STANDARD")



Loading

0 comments on commit 42d486f

Please sign in to comment.