Skip to content

Commit

Permalink
Support adding an info value for expert and owner contacts. Achieved …
Browse files Browse the repository at this point in the history
…by adding a `:`

delimiter between email and info. This is similar to how Microsoft Purview suppports
info in the import of terms.

Addresses #257
  • Loading branch information
wjohnson committed Dec 23, 2023
1 parent 0df12bf commit 6a4a89b
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 8 deletions.
11 changes: 11 additions & 0 deletions pyapacheatlas/readers/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,17 @@ def parse_bulk_entities(self, filepath, contacts_func=None):
"""
Generate a set of entities from an excel template file.
Special Headers
* `[root] someProperty` adds the cell's value to the root of the entity in the `someProperty` property.
* `[Relationship] meanings` add glossary terms
* `experts` and `owners` add expert or owner contact.
* For Microsoft Purview Supports email or email:info when used with `contacts_func`
* For Microsoft Purview Supports supports `objectId` or `objectId:info` without a `contacts_func`
* `:` is a hard coded delimiter between the email or object id and the info value
* `info` is the string of info text for Microsoft Purview
* Also supports multiple entries using the value separator (defaults to `;`) e.g. `email1;email2:info2;email3:info3`
:param str filepath:
The xlsx file that contains your table and columns.
:param function contacts_func:
Expand Down
30 changes: 22 additions & 8 deletions pyapacheatlas/readers/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def _organize_attributes(self, row, existing_entities, ignore=[]):

def _organize_contacts(self, contacts, contacts_func, contacts_cache):
"""
Convert the string with delimiters into a list of `{id: contact}`
Convert the string with delimiters into a list of `{id: contact, info: value}`
after calling the contacts_func on the stripped contact string.
:param str contacts: a splittable string.
Expand All @@ -250,15 +250,29 @@ def _organize_contacts(self, contacts, contacts_func, contacts_cache):
for contact in contacts.split(self.config.value_separator):
if contact == "":
continue
clean_contact = contact.strip()
output = contact.strip()
if clean_contact in contacts_cache:
output = contacts_cache[clean_contact]
# Support providing a email:info;email:info pattern
# This matches what Microsoft Purview does for importing
# terms through the UI and CSV
_contact_and_info = contact.strip().split(":")

_clean_contact = _contact_and_info[0].strip()
_clean_info = None
_contact_obj_id = None
if len(_contact_and_info) > 1:
_clean_info = _contact_and_info[1].strip()

if _clean_contact in contacts_cache:
_contact_obj_id = contacts_cache[_clean_contact]
else:
output = contacts_func(clean_contact)
contacts_cache[clean_contact] = output
_contact_obj_id = contacts_func(_clean_contact)
contacts_cache[_clean_contact] = _contact_obj_id
# This format is specific to Azure Purview
contacts_enhanced.append({"id": output})
output = {
"id": _contact_obj_id
}
if _clean_info:
output["info"] = _clean_info
contacts_enhanced.append(output)

return contacts_enhanced

Expand Down

0 comments on commit 6a4a89b

Please sign in to comment.