Skip to content

Commit

Permalink
feat: Add an extractor for pulling user information from BambooHR (#369)
Browse files Browse the repository at this point in the history
Signed-off-by: Nathan Lawrence <[email protected]>
  • Loading branch information
nathanlawrence-asana authored Sep 23, 2020
1 parent 88c0552 commit 6802ab1
Show file tree
Hide file tree
Showing 9 changed files with 182 additions and 0 deletions.
23 changes: 23 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -877,6 +877,29 @@ job = DefaultJob(conf=job_config,
job.launch()
```

### [BamboohrUserExtractor](./databuilder/extractor/user/bamboohr/bamboohr_user_extractor.py)

The included `BamboohrUserExtractor` provides support for extracting basic user metadata from [BambooHR](https://www.bamboohr.com/). For companies and organizations that use BambooHR to store employee information such as email addresses, first names, last names, titles, and departments, use the `BamboohrUserExtractor` to populate Amundsen user data.

A sample job config is shown below.

```python
extractor = BamboohrUserExtractor()
task = DefaultTask(extractor=extractor, loader=FsNeo4jCSVLoader())

job_config = ConfigFactory.from_dict({
'extractor.bamboohr_user.api_key': api_key,
'extractor.bamboohr_user.subdomain': subdomain,
})

job = DefaultJob(conf=job_config,
task=task,
publisher=Neo4jCsvPublisher())
job.launch()
```



## List of transformers
#### [ChainedTransformer](https://github.com/amundsen-io/amundsendatabuilder/blob/master/databuilder/transformer/base_transformer.py#L41 "ChainedTransformer")
A chanined transformer that can take multiple transformer.
Expand Down
2 changes: 2 additions & 0 deletions databuilder/extractor/user/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0
2 changes: 2 additions & 0 deletions databuilder/extractor/user/bamboohr/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0
65 changes: 65 additions & 0 deletions databuilder/extractor/user/bamboohr/bamboohr_user_extractor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0


from pyhocon import ConfigTree
import requests
from requests.auth import HTTPBasicAuth
from typing import Iterator, Optional
from xml.etree import ElementTree

from databuilder.extractor.base_extractor import Extractor
from databuilder.models.user import User


class BamboohrUserExtractor(Extractor):
API_KEY = 'api_key'
SUBDOMAIN = 'subdomain'

def init(self, conf: ConfigTree) -> None:
self._extract_iter: Optional[Iterator] = None
self._extract_iter = None

self._api_key = conf.get_string(BamboohrUserExtractor.API_KEY)
self._subdomain = conf.get_string(BamboohrUserExtractor.SUBDOMAIN)

def extract(self) -> Optional[User]:
if not self._extract_iter:
self._extract_iter = self._get_extract_iter()
try:
return next(self._extract_iter)
except StopIteration:
return None

def _employee_directory_uri(self) -> str:
return 'https://api.bamboohr.com/api/gateway.php/{subdomain}/v1/employees/directory'.format(
subdomain=self._subdomain
)

def _get_extract_iter(self) -> Iterator[User]:
response = requests.get(
self._employee_directory_uri(), auth=HTTPBasicAuth(self._api_key, 'x')
)

root = ElementTree.fromstring(response.content)

for user in root.findall('./employees/employee'):

def get_field(name: str) -> str:
field = user.find('./field[@id=\'{name}\']'.format(name=name))
if field is not None and field.text is not None:
return field.text
else:
return ''

yield User(
email=get_field('workEmail'),
first_name=get_field('firstName'),
last_name=get_field('lastName'),
name=get_field('displayName'),
team_name=get_field('department'),
role_name=get_field('jobTitle'),
)

def get_scope(self) -> str:
return 'extractor.bamboohr_user'
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,4 @@ httplib2>=0.18.0
unidecode

requests==2.23.0,<3.0
responses==0.10.6
2 changes: 2 additions & 0 deletions tests/unit/extractor/user/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0
2 changes: 2 additions & 0 deletions tests/unit/extractor/user/bamboohr/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0
46 changes: 46 additions & 0 deletions tests/unit/extractor/user/bamboohr/test_bamboohr_user_extractor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0

import io
import unittest

import os

import responses
from pyhocon import ConfigFactory

from databuilder.models.user import User
from databuilder.extractor.user.bamboohr.bamboohr_user_extractor import BamboohrUserExtractor


class TestBamboohrUserExtractor(unittest.TestCase):
@responses.activate
def test_parse_testdata(self) -> None:
bhr = BamboohrUserExtractor()
bhr.init(ConfigFactory.from_dict({'api_key': 'api_key', 'subdomain': 'amundsen'}))

testdata_xml = os.path.join(
os.path.dirname(os.path.realpath(__file__)),
'../../../resources/extractor/user/bamboohr/testdata.xml'
)

with io.open(testdata_xml) as testdata:
responses.add(responses.GET, bhr._employee_directory_uri(), body=testdata.read())

expected = User(
email='[email protected]',
first_name='Roald',
last_name='Amundsen',
name='Roald Amundsen',
team_name='508 Corporate Marketing',
role_name='Antarctic Explorer',
)

actual_users = list(bhr._get_extract_iter())

self.assertEqual(1, len(actual_users))
self.assertEqual(repr(expected), repr(actual_users[0]))


if __name__ == '__main__':
unittest.main()
39 changes: 39 additions & 0 deletions tests/unit/resources/extractor/user/bamboohr/testdata.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
<?xml version="1.0"?>
<directory>
<fieldset>
<field id="displayName">Display name</field>
<field id="firstName">First name</field>
<field id="lastName">Last name</field>
<field id="preferredName">Preferred name</field>
<field id="gender">Gender</field>
<field id="jobTitle">Job title</field>
<field id="workPhone">Work Phone</field>
<field id="mobilePhone">Mobile Phone</field>
<field id="workEmail">Work Email</field>
<field id="department">Department</field>
<field id="location">Location</field>
<field id="workPhoneExtension">Work Ext.</field>
<field id="photoUploaded">Employee photo</field>
<field id="photoUrl">Photo URL</field>
<field id="canUploadPhoto">Can Upload Photo</field>
</fieldset>
<employees>
<employee id="1082">
<field id="displayName">Roald Amundsen</field>
<field id="firstName">Roald</field>
<field id="lastName">Amundsen</field>
<field id="preferredName"></field>
<field id="gender">Male</field>
<field id="jobTitle">Antarctic Explorer</field>
<field id="workPhone"></field>
<field id="mobilePhone"></field>
<field id="workEmail">[email protected]</field>
<field id="department">508 Corporate Marketing</field>
<field id="location">Norway</field>
<field id="workPhoneExtension"></field>
<field id="photoUploaded">true</field>
<field id="photoUrl">https://upload.wikimedia.org/wikipedia/commons/thumb/6/6f/Amundsen_in_fur_skins.jpg/440px-Amundsen_in_fur_skins.jpg</field>
<field id="canUploadPhoto">no</field>
</employee>
</employees>
</directory>

0 comments on commit 6802ab1

Please sign in to comment.