Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(ingest): powerbi # scan all accessible workspaces #6441

Merged
merged 16 commits into from
Nov 28, 2022
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions metadata-ingestion/docs/sources/powerbi/powerbi_recipe.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ source:
client_secret: bar
# Enable / Disable ingestion of user information for dashboards
extract_ownership: true
# Enable / Disable ingestion of all accessible workspaces
scan_all_workspaces: false
# If scanning of all workspaces is enabled, list of workspace IDs to skip in ingestion
# scan_exclusion_list:
# - id
# dataset_type_mapping is fixed mapping of Power BI datasources type to equivalent Datahub "data platform" dataset
dataset_type_mapping:
PostgreSql: postgres
Expand Down
90 changes: 68 additions & 22 deletions metadata-ingestion/src/datahub/ingestion/source/powerbi.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,12 @@ class PowerBiAPIConfig(EnvBasedSourceConfigBase):
extract_ownership: bool = pydantic.Field(
default=True, description="Whether ownership should be ingested"
)
scan_all_workspaces: bool = pydantic.Field(
default=False, description="Scan all workspaces the role has access to"
)
scan_exclusion_list: List[str] = pydantic.Field(
looppi marked this conversation as resolved.
Show resolved Hide resolved
default=[], description="List of workspace IDs which are excluded from scan"
)
# Enable/Disable extracting report information
extract_reports: bool = pydantic.Field(
default=True, description="Whether reports should be ingested"
Expand Down Expand Up @@ -576,6 +582,33 @@ def get_dataset(self, workspace_id: str, dataset_id: str) -> Any:
datasource=None,
)

def get_groups(self):
# Replace place holders
dataset_query_endpoint = PowerBiAPI.BASE_URL
# Hit PowerBi
LOGGER.info(f"Request to get groups endpoint URL={dataset_query_endpoint}")
response = requests.get(
dataset_query_endpoint,
headers={Constant.Authorization: self.get_access_token()},
)
response.raise_for_status()
return response.json()

def get_workspaces(self):
groups = self.get_groups()
workspaces = [
PowerBiAPI.Workspace(
id=workspace.get("id"),
name=workspace.get("name"),
state="",
datasets={},
dashboards=[],
)
for workspace in groups.get("value", [])
if workspace.get("type", None) == "Workspace"
]
return workspaces

def get_data_source(self, dataset: Dataset) -> Any:
"""
Fetch the data source from PowerBi for the given dataset
Expand Down Expand Up @@ -1698,35 +1731,48 @@ def create(cls, config_dict, ctx):
config = PowerBiDashboardSourceConfig.parse_obj(config_dict)
return cls(config, ctx)

def get_workspace_ids(self) -> Iterable[str]:
if not self.source_config.scan_all_workspaces:
return (self.source_config.workspace_id,)

all_workspaces = self.powerbi_client.get_workspaces()
return [
workspace.id
for workspace in all_workspaces
if workspace.id not in self.source_config.scan_exclusion_list
]

def get_workunits(self) -> Iterable[MetadataWorkUnit]:
"""
Datahub Ingestion framework invoke this method
"""
LOGGER.info("PowerBi plugin execution is started")

# Fetch PowerBi workspace for given workspace identifier
workspace = self.powerbi_client.get_workspace(self.source_config.workspace_id)

for dashboard in workspace.dashboards:

try:
# Fetch PowerBi users for dashboards
dashboard.users = self.powerbi_client.get_dashboard_users(dashboard)
# Increase dashboard and tiles count in report
self.reporter.report_dashboards_scanned()
self.reporter.report_charts_scanned(count=len(dashboard.tiles))
except Exception as e:
message = f"Error ({e}) occurred while loading dashboard {dashboard.displayName}(id={dashboard.id}) tiles."

LOGGER.exception(message, e)
self.reporter.report_warning(dashboard.id, message)
# Convert PowerBi Dashboard and child entities to Datahub work unit to ingest into Datahub
workunits = self.mapper.to_datahub_work_units(dashboard)
for workunit in workunits:
# Add workunit to report
self.reporter.report_workunit(workunit)
# Return workunit to Datahub Ingestion framework
yield workunit
for workspace_id in self.get_workspace_ids():
LOGGER.info(f"Scanning {workspace_id=}")
workspace = self.powerbi_client.get_workspace(workspace_id)

for dashboard in workspace.dashboards:

try:
# Fetch PowerBi users for dashboards
dashboard.users = self.powerbi_client.get_dashboard_users(dashboard)
# Increase dashboard and tiles count in report
self.reporter.report_dashboards_scanned()
self.reporter.report_charts_scanned(count=len(dashboard.tiles))
except Exception as e:
message = f"Error ({e}) occurred while loading dashboard {dashboard.displayName}(id={dashboard.id}) tiles."

LOGGER.exception(message, e)
self.reporter.report_warning(dashboard.id, message)
# Convert PowerBi Dashboard and child entities to Datahub work unit to ingest into Datahub
workunits = self.mapper.to_datahub_work_units(dashboard)
for workunit in workunits:
# Add workunit to report
self.reporter.report_workunit(workunit)
# Return workunit to Datahub Ingestion framework
yield workunit

if self.source_config.extract_reports:
for report in self.powerbi_client.get_reports(workspace=workspace):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
[
{
"auditHeader": null,
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,library_db.public.issue_history,DEV)",
"entityKeyAspect": null,
"changeType": "UPSERT",
"aspectName": "datasetProperties",
"aspect": {
"value": "{\"customProperties\": {}, \"description\": \"issue_history\", \"tags\": []}",
"contentType": "application/json"
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "powerbi-test",
"registryName": null,
"registryVersion": null,
"properties": null
}
},
{
"auditHeader": null,
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,library_db.public.issue_history,DEV)",
"entityKeyAspect": null,
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"value": "{\"removed\": false}",
"contentType": "application/json"
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "powerbi-test",
"registryName": null,
"registryVersion": null,
"properties": null
}
},
{
"auditHeader": null,
"entityType": "chart",
"entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
"entityKeyAspect": null,
"changeType": "UPSERT",
"aspectName": "chartInfo",
"aspect": {
"value": "{\"customProperties\": {\"datasetId\": \"05169CD2-E713-41E6-9600-1D8066D95445\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"test_tile\", \"description\": \"test_tile\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:postgres,library_db.public.issue_history,DEV)\"}]}",
"contentType": "application/json"
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "powerbi-test",
"registryName": null,
"registryVersion": null,
"properties": null
}
},
{
"auditHeader": null,
"entityType": "chart",
"entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
"entityKeyAspect": null,
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"value": "{\"removed\": false}",
"contentType": "application/json"
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "powerbi-test",
"registryName": null,
"registryVersion": null,
"properties": null
}
},
{
"auditHeader": null,
"entityType": "chart",
"entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
"entityKeyAspect": null,
"changeType": "UPSERT",
"aspectName": "chartKey",
"aspect": {
"value": "{\"dashboardTool\": \"powerbi\", \"chartId\": \"powerbi.linkedin.com/charts/B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0\"}",
"contentType": "application/json"
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "powerbi-test",
"registryName": null,
"registryVersion": null,
"properties": null
}
},
{
"auditHeader": null,
"entityType": "dashboard",
"entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
"entityKeyAspect": null,
"changeType": "UPSERT",
"aspectName": "browsePaths",
"aspect": {
"value": "{\"paths\": [\"/powerbi/demo-workspace\"]}",
"contentType": "application/json"
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "powerbi-test",
"registryName": null,
"registryVersion": null,
"properties": null
}
},
{
"auditHeader": null,
"entityType": "dashboard",
"entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
"entityKeyAspect": null,
"changeType": "UPSERT",
"aspectName": "dashboardInfo",
"aspect": {
"value": "{\"customProperties\": {\"chartCount\": \"1\", \"workspaceName\": \"demo-workspace\", \"workspaceId\": \"7D668CAD-7FFC-4505-9215-655BCA5BEBAE\"}, \"title\": \"test_dashboard\", \"description\": \"test_dashboard\", \"charts\": [\"urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)\"], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"https://localhost/dashboards/web/1\"}",
"contentType": "application/json"
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "powerbi-test",
"registryName": null,
"registryVersion": null,
"properties": null
}
},
{
"auditHeader": null,
"entityType": "dashboard",
"entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
"entityKeyAspect": null,
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"value": "{\"removed\": false}",
"contentType": "application/json"
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "powerbi-test",
"registryName": null,
"registryVersion": null,
"properties": null
}
},
{
"auditHeader": null,
"entityType": "dashboard",
"entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
"entityKeyAspect": null,
"changeType": "UPSERT",
"aspectName": "dashboardKey",
"aspect": {
"value": "{\"dashboardTool\": \"powerbi\", \"dashboardId\": \"powerbi.linkedin.com/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE\"}",
"contentType": "application/json"
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "powerbi-test",
"registryName": null,
"registryVersion": null,
"properties": null
}
},
{
"aspect": {
"contentType": "application/json",
"value": "{\"paths\": [\"/powerbi/second-demo-workspace\"]}"
},
"aspectName": "browsePaths",
"changeType": "UPSERT",
"entityType": "dashboard",
"entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)",
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "powerbi-test"
}
},
{
"aspect": {
"contentType": "application/json",
"value": "{\"customProperties\": {\"chartCount\": \"0\", \"workspaceName\": \"second-demo-workspace\", \"workspaceId\": \"7D668CAD-8FFC-4505-9215-655BCA5BEBAE\"}, \"title\": \"test_dashboard2\", \"description\": \"test_dashboard2\", \"charts\": [], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"https://localhost/dashboards/web/1\"}"
},
"aspectName": "dashboardInfo",
"changeType": "UPSERT",
"entityType": "dashboard",
"entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)",
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "powerbi-test"
}
},
{
"aspect": {
"contentType": "application/json",
"value": "{\"removed\": false}"
},
"aspectName": "status",
"changeType": "UPSERT",
"entityType": "dashboard",
"entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)",
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "powerbi-test"
}
},
{
"aspect": {
"contentType": "application/json",
"value": "{\"dashboardTool\": \"powerbi\", \"dashboardId\": \"powerbi.linkedin.com/dashboards/7D668CAD-8FFC-4505-9215-655BCA5BEBAE\"}"
},
"aspectName": "dashboardKey",
"changeType": "UPSERT",
"entityType": "dashboard",
"entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)",
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "powerbi-test"
}
}
]
Loading