From 69f945cefa1a00ac8a1b13064a3ef85c8a180cf1 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Thu, 10 Nov 2022 10:57:46 +0530
Subject: [PATCH 01/53] powerbi package

---
 .../src/datahub/ingestion/source/{ => powerbi}/powerbi.py       | 2 ++
 1 file changed, 2 insertions(+)
 rename metadata-ingestion/src/datahub/ingestion/source/{ => powerbi}/powerbi.py (99%)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
similarity index 99%
rename from metadata-ingestion/src/datahub/ingestion/source/powerbi.py
rename to metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
index 993e74a76f9ab..f709d20637e9e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
@@ -900,6 +900,8 @@ def init_dashboard_tiles(workspace: PowerBiAPI.Workspace) -> None:
         # Scan is complete lets take the result
         scan_result = get_scan_result(scan_id=scan_id)
         LOGGER.debug("scan result = {}".format(scan_result))
+        import json
+        print(json.dumps(scan_result, indent=1))
         workspace = PowerBiAPI.Workspace(
             id=scan_result["id"],
             name=scan_result["name"],

From d68230ba911d9dfc2dc636097c380e80bc223f74 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Mon, 14 Nov 2022 10:04:51 +0530
Subject: [PATCH 02/53] restructure powerbi

---
 .../ingestion/source/powerbi/__init__.py      |  1 +
 .../source/powerbi/expression_parser.py       | 79 +++++++++++++++++++
 2 files changed, 80 insertions(+)
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/powerbi/__init__.py
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/powerbi/expression_parser.py

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/__init__.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/__init__.py
new file mode 100644
index 0000000000000..85296f8b7a31e
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/__init__.py
@@ -0,0 +1 @@
+from powerbi import PowerBiDashboardSource
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/expression_parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/expression_parser.py
new file mode 100644
index 0000000000000..f5d437f6a23d2
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/expression_parser.py
@@ -0,0 +1,79 @@
+from abc import ABC, abstractmethod
+from typing import Optional, List, Dict
+
+
+class Token(ABC):
+    @abstractmethod
+    def parse_raw_token(self) -> str:
+        pass
+
+
+class BaseToken(Token, ABC):
+    _raw_token: str
+    _nested_tokens: Optional[List["BaseToken"]]
+
+    def __init__(self, raw_token: str, nested_tokens: Optional[List["BaseToken"]]):
+        self._raw_token = raw_token
+        self._nested_tokens = nested_tokens
+        self.parse_raw_token(self._raw_token)
+
+
+class LetToken(BaseToken):
+    def __init__(self, raw_token: str, nested_raw_tokens: Optional[List["Token"]]):
+        super().__init__(raw_token, nested_raw_tokens)
+
+    def parse_raw_token(self) -> str:
+        pass
+
+
+class TableFuncToken(BaseToken):
+    def __init__(self, raw_token: str, nested_raw_tokens: Optional[List["BaseToken"]]):
+        super().__init__(raw_token, nested_raw_tokens)
+
+    def parse_raw_token(self) -> str:
+        pass
+
+
+class DataAccessToken(BaseToken):
+    def __init__(self, raw_token: str, nested_raw_tokens: Optional[List["BaseToken"]]):
+        super().__init__(raw_token, nested_raw_tokens)
+
+    def parse_raw_token(self) -> str:
+        pass
+
+
+class OracleDataAccessToken(BaseToken):
+    def __init__(self, raw_token: str, nested_raw_tokens: Optional[List["BaseToken"]]):
+        super().__init__(raw_token, nested_raw_tokens)
+
+    def parse_raw_token(self) -> str:
+        pass
+
+
+class Step:
+    tokens: List[BaseToken]
+    def __init__(self, tokens: List[BaseToken]):
+        self.tokens = tokens
+
+
+token_registry: Dict[str, BaseToken] = {
+    "let": LetToken,
+    "Table": TableFuncToken,
+    "PostgreSQL.Database": DataAccessToken,
+    "DB2.Database": DataAccessToken,
+    "Sql.Database": DataAccessToken,
+    "Oracle.Database": OracleDataAccessToken,
+}
+
+
+# identifier with space are not supported.
+# This is one of the way to create identifier in M https://learn.microsoft.com/en-us/powerquery-m/expression-identifier
+def parse_expression(expression: str) -> List[Step]:
+    strip_expression: str = expression.strip()
+    raw_token: str = ""
+    index: int = 0
+    for c in strip_expression:
+        if c == ' ':
+            continue
+
+        raw_token = raw_token + c

From 51d68200c44d9db6d9314f674f93a95ef79832f7 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Wed, 16 Nov 2022 13:45:53 +0530
Subject: [PATCH 03/53] lexical rules

---
 metadata-ingestion/setup.py                   |   2 +-
 .../ingestion/source/powerbi/__init__.py      |   2 +-
 .../{expression_parser.py => m_parser.py}     |  15 +-
 .../powerbi/powerbi-lexical-grammar.rule      | 551 ++++++++++++++++++
 .../integration/powerbi/test_m_parser.py      |   5 +
 5 files changed, 563 insertions(+), 12 deletions(-)
 rename metadata-ingestion/src/datahub/ingestion/source/powerbi/{expression_parser.py => m_parser.py} (83%)
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule
 create mode 100644 metadata-ingestion/tests/integration/powerbi/test_m_parser.py

diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index c81ac28577ff3..4e61fdf436cbb 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -325,7 +325,7 @@ def get_long_description():
     "trino": sql_common | trino,
     "starburst-trino-usage": sql_common | usage_common | trino,
     "nifi": {"requests", "packaging"},
-    "powerbi": microsoft_common,
+    "powerbi": microsoft_common | {"lark[regex]==1.1.4"},
     "powerbi-report-server": powerbi_report_server,
     "vertica": sql_common | {"sqlalchemy-vertica[vertica-python]==0.0.5"},
     "unity-catalog": databricks_cli | {"requests"},
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/__init__.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/__init__.py
index 85296f8b7a31e..1068f335e8f8e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/__init__.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/__init__.py
@@ -1 +1 @@
-from powerbi import PowerBiDashboardSource
+from datahub.ingestion.source.powerbi.powerbi import PowerBiDashboardSource
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/expression_parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
similarity index 83%
rename from metadata-ingestion/src/datahub/ingestion/source/powerbi/expression_parser.py
rename to metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
index f5d437f6a23d2..7ebfd9a2e3966 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/expression_parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
@@ -1,5 +1,7 @@
 from abc import ABC, abstractmethod
 from typing import Optional, List, Dict
+import importlib.resources as pkg_resource
+from lark import Lark
 
 
 class Token(ABC):
@@ -66,14 +68,7 @@ def __init__(self, tokens: List[BaseToken]):
 }
 
 
-# identifier with space are not supported.
-# This is one of the way to create identifier in M https://learn.microsoft.com/en-us/powerquery-m/expression-identifier
 def parse_expression(expression: str) -> List[Step]:
-    strip_expression: str = expression.strip()
-    raw_token: str = ""
-    index: int = 0
-    for c in strip_expression:
-        if c == ' ':
-            continue
-
-        raw_token = raw_token + c
+    grammar: str = pkg_resource.read_text("datahub.ingestion.source.powerbi", "powerbi-lexical-grammar.rule")
+    lark_parser = Lark(grammar,  start="let_expression", regex=True)
+    print(lark_parser.parse(expression).pretty())
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule
new file mode 100644
index 0000000000000..4ca73fb4625c8
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule
@@ -0,0 +1,551 @@
+lexical_unit:   lexical_elements?
+
+lexical_elements:   lexical_element 
+                |   lexical_elements?
+
+lexical_element:    whitespace
+                |   token comment
+
+whitespace: WS
+        |   new_line_character
+
+new_line_character: CR
+                |   LF
+                |   NEWLINE
+
+comment:    single_line_comment
+        |   delimited_comment
+
+
+single_line_comment:    single_line_comment_characters?
+
+single_line_comment_characters: single_line_comment_character 
+                            |   single_line_comment_characters?
+
+single_line_comment_character: CPP_COMMENT
+
+delimited_comment: C_COMMENT
+
+asterisks:  "*"
+        |   asterisks?
+
+token:  identifier
+    |   keyword
+    |   literal
+    |   operator_or_punctuator
+
+character_escape_sequence: "#(" escape_sequence_list ")"
+
+escape_sequence_list:   single_escape_sequence
+                    |   escape_sequence_list "," single_escape_sequence
+
+single_escape_sequence: long_unicode_escape_sequence
+                  |     short_unicode_escape_sequence
+                  |     control_character_escape_sequence
+                  |     escape_escape
+
+long_unicode_escape_sequence:       hex_digit hex_digit hex_digit hex_digit hex_digit hex_digit hex_digit hex_digit
+
+short_unicode_escape_sequence:      hex_digit hex_digit hex_digit hex_digit
+
+control_character_escape_sequence:  control_character
+
+control_character:  CR
+                |   LF
+                |   /\t/
+
+escape_escape:  "#"
+
+literal:    logical_literal
+        |   number_literal
+        |   text_literal
+        |   null_literal
+        |   verbatim_literal
+
+logical_literal:    "true"
+                |   "false"
+
+number_literal: decimal_number_literal
+            |   hexadecimal_number_literal
+
+decimal_digits: decimal_digit
+            |   decimal_digits?
+
+decimal_digit: DIGIT
+
+hexadecimal_number_literal: "0x" hex_digits
+                        |   "0X" hex_digits
+
+hex_digits: hex_digit 
+        |   hex_digits?
+
+hex_digit: HEXDIGIT
+
+decimal_number_literal: decimal_digits "." decimal_digits exponent_part?
+                    |   decimal_digits exponent_part?
+                    |   decimal_digits exponent_part?
+
+exponent_part:  "e" sign? decimal_digits
+            |   "E" sign? decimal_digits
+
+sign: ["+"|"-"]
+
+text_literal:   ESCAPED_STRING
+
+text_literal_characters:    text_literal_character 
+                    |       text_literal_characters?
+
+text_literal_character:     single_text_character
+                    |       character_escape_sequence
+                    |       double_quote_escape_sequence
+
+single_text_character: /./
+                     | /[^#]/    
+
+double_quote_escape_sequence:   "\"\""
+
+null_literal: "null"
+
+verbatim_literal: "#!\"" text_literal_characters? "\""
+
+identifier: regular_identifier
+      |     quoted_identifier
+
+regular_identifier:     available_identifier
+                  |     available_identifier dot_character regular_identifier
+
+available_identifier:   keyword_or_identifier
+
+keyword_or_identifier:  letter_character
+                  |     underscore_character
+                  |     identifier_start_character identifier_part_characters
+
+identifier_start_character:   letter_character
+                  |           underscore_character
+
+identifier_part_characters:   identifier_part_character identifier_part_characters?
+
+identifier_part_character:    letter_character
+                        |     decimal_digit_character
+                        |     underscore_character
+                        |     connecting_character
+                        |     combining_character
+                        |     formatting_character
+
+generalized_identifier: generalized_identifier_part
+                  |     generalized_identifier  WS_INLINE generalized_identifier_part
+
+generalized_identifier_part:  generalized_identifier_segment
+                        |     decimal_digit_character generalized_identifier_segment
+
+generalized_identifier_segment:     keyword_or_identifier
+                              |     keyword_or_identifier dot_character keyword_or_identifier
+
+dot_character:    "."
+
+underscore_character:   "_"
+
+letter_character: /[_\-\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}]+/
+
+combining_character:    /[_\p{Mn}\p{Mc}]+/
+
+decimal_digit_character: /[\p{Nd}]+/
+
+connecting_character:   /[\p{Pc}]+/
+
+formatting_character:   /[\p{Cf}]+/
+      
+quoted_identifier:      "#" "\"" text_literal_characters? "\""
+
+keyword:    "and"
+        |   "as"
+        |   "each"
+        |   "else"
+        |   "error" 
+        |   "false"
+        |   "if"
+        |   "in"
+        |   "is" 
+        |   "let"
+        |   "meta"
+        |   "not" 
+        |   "null"
+        |   "or"
+        |   "otherwise"
+        |   "section"
+        |   "shared"
+        |   "then" 
+        |   "true"
+        |   "try" 
+        |   "type" 
+        |   "#binary"
+        |   "#date" 
+        |   "#datetime"
+        |   "#datetimezone"
+        |   "#duration"
+        |   "#infinity"
+        |   "#nan" 
+        |   "#sections" 
+        |   "#shared" 
+        |   "#table" 
+        |   "#time"
+
+
+operator_or_punctuator: ","
+                    |   ";"
+                    |   "="
+                    |   "<"
+                    |   "<="
+                    |   ">"
+                    |   ">=" 
+                    |   "<>"
+                    |   "+" 
+                    |   "_" 
+                    |   "*" 
+                    |   "/"
+                    |   "&"
+                    |   "(" 
+                    |   ")" 
+                    |   "[" 
+                    |   "]" 
+                    |   "{" 
+                    |   "}" 
+                    |   "@" 
+                    |   "?" 
+                    |   "??" 
+                    |   "=>" 
+                    |   ".." 
+                    |   "..."
+
+document:   section_document
+      |     expression_document
+
+section_document:   section
+
+section:    literal_attributes? 
+        |   section 
+        |   section_name ";" section_members?
+
+section_name:   identifier
+
+section_members: section_member 
+            |    section_members?
+
+section_member: literal_attributes? 
+            |   "shared"? 
+            |   section_member_name "=" expression ";"
+
+section_member_name:    identifier
+
+expression_document:    expression
+
+expression: logical_or_expression
+      |     each_expression
+      |     function_expression
+      |     let_expression
+      |     if_expression
+      |     error_raising_expression
+      |     error_handling_expression
+
+logical_or_expression:  logical_and_expression
+                    |   logical_and_expression "or" logical_or_expression
+
+logical_and_expression: is_expression
+                    |   logical_and_expression "and" is_expression
+
+is_expression:  as_expression
+            |   is_expression "is" nullable_primitive_type
+
+nullable_primitive_type:    "nullable"? primitive_type
+
+as_expression:  equality_expression
+            |   as_expression "as" nullable_primitive_type
+
+equality_expression:    relational_expression
+                   |    relational_expression "=" equality_expression
+                   |    relational_expression "<>" equality_expression
+
+relational_expression:  additive_expression
+                    |   additive_expression "<" relational_expression
+                    |   additive_expression ">" relational_expression
+                    |   additive_expression "<=" relational_expression
+                    |   additive_expression ">=" relational_expression
+
+additive_expression:    multiplicative_expression
+                    |   multiplicative_expression "+" additive_expression
+                    |   multiplicative_expression "_" additive_expression
+                    |   multiplicative_expression "&" "_" additive_expression
+
+multiplicative_expression:  metadata_expression
+                        |   metadata_expression "*" multiplicative_expression
+                        |   metadata_expression "/" multiplicative_expression
+
+metadata_expression:    unary_expression
+                |       unary_expression 
+                |       "meta" 
+                |       unary_expression
+
+unary_expression:   type_expression
+                |   "+" unary_expression
+                |   "_" unary_expression
+                |   "not" unary_expression
+
+primary_expression: literal_expression
+                |   list_expression
+                |   record_expression
+                |   identifier_expression
+                |   section_access_expression
+                |   parenthesized_expression
+                |   field_access_expression
+                |   item_access_expression
+                |   invoke_expression
+                |   not_implemented_expression
+
+literal_expression: literal
+
+identifier_expression:  identifier_reference
+
+identifier_reference:   exclusive_identifier_reference
+                    |   inclusive_identifier_reference
+
+exclusive_identifier_reference: identifier
+
+inclusive_identifier_reference: "@" identifier
+
+section_access_expression:  identifier "!" identifier
+
+parenthesized_expression:   "(" WS_INLINE? expression WS_INLINE? ")"
+
+not_implemented_expression: "..."
+
+invoke_expression:  primary_expression WS_INLINE? "(" WS_INLINE? argument_list? WS_INLINE? ")"
+
+argument_list:  expression
+            |   expression "," argument_list
+            |   "\"" identifier "\""
+            |   "\"" identifier "\"" "," argument_list
+
+
+list_expression:    "{" item_list? "}"
+
+item_list:  item
+        |   item "," item_list
+
+item:   expression
+    |   expression ".." expression
+
+record_expression:  "[" field_list? "]"
+
+field_list: field
+        |   field "," field_list
+
+field:  field_name WS_INLINE? "=" WS_INLINE? expression
+
+field_name: generalized_identifier
+        |   quoted_identifier
+
+item_access_expression: item_selection
+                    |   optional_item_selection
+
+item_selection: primary_expression "{" item_selector "}"
+
+optional_item_selection:    primary_expression "{" item_selector "}" "?"
+
+item_selector:  expression
+
+field_access_expression:    field_selection
+                        |   implicit_target_field_selection
+                        |   projection
+                        |   implicit_target_projection
+
+field_selection:    primary_expression field_selector
+
+field_selector: required_field_selector
+            |   optional_field_selector
+
+required_field_selector:    "[" WS_INLINE? field_name WS_INLINE? "]"
+
+optional_field_selector:    "[" WS_INLINE? field_name WS_INLINE? "]" "?"
+
+implicit_target_field_selection:    field_selector
+
+projection: primary_expression required_projection
+        |   primary_expression optional_projection
+
+required_projection:    "[" WS_INLINE? required_selector_list WS_INLINE? "]"
+
+optional_projection:    "[" WS_INLINE? required_selector_list WS_INLINE? "]" "?"
+
+required_selector_list: required_field_selector
+                    |   required_field_selector "," required_selector_list
+
+implicit_target_projection: required_projection
+                        |   optional_projection
+
+function_expression:    "(" WS_INLINE? parameter_list?  WS_INLINE? ")" WS_INLINE return_type? "=>" function_body
+
+function_body:  expression
+
+parameter_list: fixed_parameter_list
+            |   fixed_parameter_list "," optional_parameter_list
+            |   optional_parameter_list
+
+fixed_parameter_list:   parameter
+                    |   parameter "," fixed_parameter_list
+
+parameter:  parameter_name parameter_type?
+
+parameter_name: identifier
+
+parameter_type: assertion
+
+return_type:    assertion
+
+assertion:  "as" WS_INLINE nullable_primitive_type
+
+optional_parameter_list:    optional_parameter
+                        |   optional_parameter "," optional_parameter_list
+
+optional_parameter: "optional" WS_INLINE parameter
+
+each_expression:    "each" WS_INLINE each_expression_body
+
+each_expression_body:   function_body
+
+let_expression: "let" NEWLINE WS_INLINE? variable_list WS_INLINE? NEWLINE? in_expression
+
+in_expression: "in" NEWLINE? WS_INLINE NEWLINE? expression
+
+variable_list:    variable
+            |     variable NEWLINE? WS_INLINE? "," NEWLINE? WS_INLINE? variable_list
+
+variable:   variable_name WS_INLINE? "=" WS_INLINE? expression
+
+variable_name:    identifier
+
+if_expression:    "if" WS_INLINE if_condition WS_INLINE NEWLINE? "then" WS_INLINE true_expression WS_INLINE "else" WS_INLINE false_expression
+
+if_condition:     expression
+
+true_expression:  expression
+
+false_expression: expression
+
+type_expression:  primary_expression
+            |     "type" WS_INLINE primary_type
+
+type: parenthesized_expression
+    | primary_type
+
+primary_type:   primitive_type
+            |   record_type
+            |   list_type
+            |   function_type
+            |   table_type
+            |   nullable_type
+
+primitive_type: "any" 
+            |   "anynonnull"
+            |   "binary"
+            |   "date" 
+            |   "datetime" 
+            |   "datetimezone" 
+            |   "duration" 
+            |   "function"
+            |   "list" 
+            |   "logical" 
+            |   "none" 
+            |   "null" 
+            |   "number" 
+            |   "record" 
+            |   "table" 
+            |   "text" 
+            |   "time" 
+            |   "type"
+
+record_type:      "[" WS_INLINE? open_record_marker WS_INLINE? "]"
+            |     "[" WS_INLINE? field_specification_list? WS_INLINE "]"
+            |     "[" WS_INLINE? field_specification_list WS_INLINE "," WS_INLINE? open_record_marker WS_INLINE? "]"
+
+field_specification_list:     field_specification
+                        |     field_specification WS_INLINE? "," WS_INLINE? field_specification_list
+
+field_specification:    "optional"? WS_INLINE? field_name WS_INLINE field_type_specification?
+
+field_type_specification:     "=" WS_INLINE field_type
+
+field_type: type
+
+open_record_marker:     "..."
+
+list_type:  "{" WS_INLINE? item_type WS_INLINE? "}"
+
+item_type: type
+
+function_type: "function" WS_INLINE? "(" WS_INLINE? parameter_specification_list? WS_INLINE? ")" WS_INLINE? return_type
+
+parameter_specification_list: required_parameter_specification_list
+                        |     required_parameter_specification_list WS_INLINE? "," WS_INLINE? optional_parameter_specification_list
+                        |     optional_parameter_specification_list
+
+required_parameter_specification_list:    required_parameter_specification
+                                    |     required_parameter_specification WS_INLINE? "," WS_INLINE? required_parameter_specification_list
+
+required_parameter_specification:   parameter_specification
+
+optional_parameter_specification_list:    optional_parameter_specification
+                                    |     optional_parameter_specification WS_INLINE? "," WS_INLINE? optional_parameter_specification_list
+
+optional_parameter_specification:   "optional" parameter_specification
+
+parameter_specification:      parameter_name WS_INLINE parameter_type
+
+table_type: "table" WS_INLINE row_type
+
+row_type:   "[" WS_INLINE? field_specification_list? WS_INLINE? "]"
+
+nullable_type:    "nullable" WS_INLINE type
+
+error_raising_expression:     "error" WS_INLINE expression "_"
+
+error_handling_expression:    "try" WS_INLINE protected_expression WS_INLINE otherwise_clause?
+
+protected_expression:   expression
+
+otherwise_clause: "otherwise" WS_INLINE default_expression
+
+default_expression:     expression
+
+literal_attributes:     record_literal
+
+record_literal:   "[" WS_INLINE? literal_field_list? WS_INLINE? "]"
+
+literal_field_list:     literal_field
+                  |     literal_field WS_INLINE? "," WS_INLINE? literal_field_list
+
+literal_field:    field_name WS_INLINE? "=" WS_INLINE? any_literal
+
+list_literal:     "{" WS_INLINE? literal_item_list? WS_INLINE? "}"
+
+literal_item_list:      any_literal
+                  |     any_literal WS_INLINE? "," WS_INLINE? literal_item_list
+
+any_literal:      record_literal
+            |     list_literal
+            |     logical_literal
+            |     number_literal
+            |     text_literal
+            |     null_literal
+
+
+%import common.WORD
+%import common.WS_INLINE
+%import common.CPP_COMMENT
+%import common.C_COMMENT
+%import common.WS
+%import common.NEWLINE
+%import common.HEXDIGIT
+%import common.DIGIT
+%import common.LF
+%import common.CR
+%import common.ESCAPED_STRING
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
new file mode 100644
index 0000000000000..9642d9a849dd9
--- /dev/null
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -0,0 +1,5 @@
+from datahub.ingestion.source.powerbi import m_parser
+
+def test_parse_m_query():
+    expression: str = "let\n    Source = Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n    PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n    TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n    TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n    TESTTABLE_Table"
+    m_parser.parse_expression(expression)

From 24e0ba9fb142c7284ff700b4285e4903e98b5e46 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Wed, 16 Nov 2022 16:07:25 +0530
Subject: [PATCH 04/53] 12 expression test case

---
 .../ingestion/source/powerbi/m_parser.py      |  2 +-
 .../integration/powerbi/test_m_parser.py      | 58 ++++++++++++++++++-
 2 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
index 7ebfd9a2e3966..459201b55d853 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
@@ -70,5 +70,5 @@ def __init__(self, tokens: List[BaseToken]):
 
 def parse_expression(expression: str) -> List[Step]:
     grammar: str = pkg_resource.read_text("datahub.ingestion.source.powerbi", "powerbi-lexical-grammar.rule")
-    lark_parser = Lark(grammar,  start="let_expression", regex=True)
+    lark_parser = Lark(grammar,  start="let_expression", parser="lalr", regex=True)
     print(lark_parser.parse(expression).pretty())
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index 9642d9a849dd9..d413cd0c3d7e5 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -1,5 +1,61 @@
 from datahub.ingestion.source.powerbi import m_parser
 
-def test_parse_m_query():
+
+def test_parse_m_query1():
     expression: str = "let\n    Source = Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n    PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n    TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n    TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n    TESTTABLE_Table"
     m_parser.parse_expression(expression)
+
+
+def test_parse_m_query2():
+    expression: str = "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n    #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n    #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n    #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n    #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n    #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n    then [UNIT] * 361\nelse 0),\n    #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n    #\"Added Custom2\""
+    m_parser.parse_expression(expression)
+
+
+# def test_parse_m_query3():
+#     expression: str = "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select #(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS Agent,#(lf)TIER,#(lf)UPPER(MANAGER),#(lf)TEAM_TYPE,#(lf)DATE_TARGET,#(lf)MONTHID,#(lf)TARGET_TEAM,#(lf)SELLER_EMAIL,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY,#(lf)UNIT_TARGET AS SME_Quota,#(lf)AMV_TARGET AS Revenue_Quota,#(lf)SERVICE_QUOTA,#(lf)BL_TARGET,#(lf)SOFTWARE_QUOTA as Software_Quota#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Accounting'#(lf)and TARGET_TEAM = 'Enterprise'\", null, [EnableFolding=true]),\n    #\"Added Conditional Column\" = Table.AddColumn(Source, \"Has PS Software Quota?\", each if [TIER] = \"Expansion (Medium)\" then \"Yes\" else if [TIER] = \"Acquisition\" then \"Yes\" else \"No\")\nin\n    #\"Added Conditional Column\""
+#     m_parser.parse_expression(expression)
+#
+#
+# def test_parse_m_query4():
+#     expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *#(lf),concat((UPPER(REPLACE(CLIENT_MANAGER_QUOTED,'-',''))), MONTHID) as AGENT_KEY#(lf),concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY#(lf)#(lf)from V_OIP_ENT_2022\"]),\n    #\"Added Custom\" = Table.AddColumn(Source, \"OIP in $(*$350)\", each [SALES_INVOICE_AMOUNT] * 350),\n    #\"Changed Type\" = Table.TransformColumnTypes(#\"Added Custom\",{{\"OIP in $(*$350)\", type number}})\nin\n    #\"Changed Type\""
+#     m_parser.parse_expression(expression)
+#
+#
+# def test_parse_m_query5():
+#     expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"Select *,#(lf)#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_QUOTED,'-',''))), #(lf)LEFT(CAST(DTE AS DATE),4)+LEFT(RIGHT(CAST(DTE AS DATE),5),2)) AS AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), #(lf)LEFT(CAST(DTE AS DATE),4)+LEFT(RIGHT(CAST(DTE AS DATE),5),2)) AS CD_AGENT_KEY#(lf)#(lf)from V_INVOICE_BOOKING_2022\"]),\n    #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"CLIENT_ID\", Int64.Type}}),\n    #\"Added Conditional Column\" = Table.AddColumn(#\"Changed Type\", \"PS Software (One-Off)\", each if Text.Contains([REVENUE_TYPE], \"Software\") then [Inv_Amt] else if Text.Contains([REVENUE_TYPE], \"Tax Seminar\") then [Inv_Amt] else 0),\n    #\"Filtered Rows\" = Table.SelectRows(#\"Added Conditional Column\", each true),\n    #\"Duplicated Column\" = Table.DuplicateColumn(#\"Filtered Rows\", \"CLIENT_ID\", \"CLIENT_ID - Copy\"),\n    #\"Changed Type1\" = Table.TransformColumnTypes(#\"Duplicated Column\",{{\"CLIENT_ID - Copy\", type text}}),\n    #\"Renamed Columns\" = Table.RenameColumns(#\"Changed Type1\",{{\"CLIENT_ID - Copy\", \"CLIENT_ID for Filter\"}})\nin\n    #\"Renamed Columns\""
+#     m_parser.parse_expression(expression)
+#
+#
+# def test_parse_m_query6():
+#     expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"SELECT *,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), #(lf)LEFT(CAST(MONTH_DATE AS DATE),4)+LEFT(RIGHT(CAST(MONTH_DATE AS DATE),5),2)) AS AGENT_KEY#(lf)#(lf)FROM dbo.V_ARR_ADDS\"]),\n    #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"MONTH_DATE\", type date}}),\n    #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([MONTH_DATE]))\nin\n    #\"Added Custom\""
+#     m_parser.parse_expression(expression)
+#
+#
+# def test_parse_m_query7():
+#     expression: str = "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select #(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS CLIENT_DIRECTOR,#(lf)TIER,#(lf)UPPER(MANAGER),#(lf)TEAM_TYPE,#(lf)DATE_TARGET,#(lf)MONTHID,#(lf)TARGET_TEAM,#(lf)SELLER_EMAIL,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY,#(lf)UNIT_TARGET AS SME_Quota,#(lf)AMV_TARGET AS Revenue_Quota,#(lf)SERVICE_QUOTA,#(lf)BL_TARGET,#(lf)SOFTWARE_QUOTA as Software_Quota#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Accounting'#(lf)and TARGET_TEAM = 'Enterprise'#(lf)AND TIER = 'Client Director'\", null, [EnableFolding=true])\nin\n    Source"
+#     m_parser.parse_expression(expression)
+#
+#
+# def test_parse_m_query8():
+#     expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n    #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n    #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n    #\"Added Custom1\""
+#     m_parser.parse_expression(expression)
+#
+#
+# def test_parse_m_query9():
+#     expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select#(lf)CLIENT_ID,#(lf)PARTNER_ACCOUNT_NAME,#(lf)CM_CLOSING_MNTH_COUNTRY,#(lf)MONTH_WID,#(lf)PS_DELETES,#(lf)CLIENT_MANAGER_CLOSING_MONTH,#(lf)SME_DELETES,#(lf)TPV_AMV_OPENING,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_TPV_LEADERBOARD\", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"MONTH_WID\", type text}}),\n    #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"MONTH_DATE\", each Date.FromText(\nText.Range([MONTH_WID], 0,4) & \"-\"  &\nText.Range([MONTH_WID], 4,2)\n)),\n    #\"Added Custom2\" = Table.AddColumn(#\"Added Custom\", \"Month\", each Date.Month([MONTH_DATE])),\n    #\"Added Custom1\" = Table.AddColumn(#\"Added Custom2\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n    #\"Added Custom1\""
+#     m_parser.parse_expression(expression)
+#
+#
+# def test_parse_m_query10():
+#     expression: str = "let\n    Source = Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"OPERATIONS_ANALYTICS_WAREHOUSE_PROD\",[Role=\"OPERATIONS_ANALYTICS_MEMBER_AD\"]),\n    OPERATIONS_ANALYTICS_Database = Source{[Name=\"OPERATIONS_ANALYTICS\",Kind=\"Database\"]}[Data],\n    TEST_Schema = OPERATIONS_ANALYTICS_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n    LZ_MIGRATION_DOWNLOAD_View = TEST_Schema{[Name=\"LZ_MIGRATION_DOWNLOAD\",Kind=\"View\"]}[Data],\n    #\"Changed Type\" = Table.TransformColumnTypes(LZ_MIGRATION_DOWNLOAD_View,{{\"MIGRATION_MONTH_ID\", type text}}),\n    #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Migration Month\", each Date.FromText(\nText.Range([MIGRATION_MONTH_ID], 0,4) & \"-\" & \nText.Range([MIGRATION_MONTH_ID], 4,2) \n)),\n    #\"Changed Type1\" = Table.TransformColumnTypes(#\"Added Custom\",{{\"Migration Month\", type date}})\nin\n    #\"Changed Type1\""
+#     m_parser.parse_expression(expression)
+#
+#
+# def test_parse_m_query11():
+#     expression: str = "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select *,#(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS Agent,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Industries'#(lf)and TARGET_TEAM = 'Enterprise'\", null, [EnableFolding=true])\nin\n    Source"
+#     m_parser.parse_expression(expression)
+#
+#
+# def test_parse_m_query12():
+#     expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"Select#(lf)*,#(lf)concat((UPPER(REPLACE(SALES_SPECIALIST,'-',''))),#(lf)LEFT(CAST(INVOICE_DATE AS DATE),4)+LEFT(RIGHT(CAST(INVOICE_DATE AS DATE),5),2)) AS AGENT_KEY,#(lf)CASE#(lf)    WHEN CLASS = 'Software' and (NOT(PRODUCT in ('ADV', 'Adv') and left(ACCOUNT_ID,2)='10') #(lf)    or V_ENTERPRISE_INVOICED_REVENUE.TYPE = 'Manual Adjustment') THEN INVOICE_AMOUNT#(lf)    WHEN V_ENTERPRISE_INVOICED_REVENUE.TYPE IN ('Recurring','0') THEN INVOICE_AMOUNT#(lf)    ELSE 0#(lf)END as SOFTWARE_INV#(lf)#(lf)from V_ENTERPRISE_INVOICED_REVENUE\", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #\"Added Conditional Column\" = Table.AddColumn(Source, \"Services\", each if [CLASS] = \"Services\" then [INVOICE_AMOUNT] else 0),\n    #\"Added Custom\" = Table.AddColumn(#\"Added Conditional Column\", \"Advanced New Sites\", each if [PRODUCT] = \"ADV\"\nor [PRODUCT] = \"Adv\"\nthen [NEW_SITE]\nelse 0)\nin\n    #\"Added Custom\""
+#     m_parser.parse_expression(expression)

From c539b086fbc7128ab525632b7915a9693eff4cba Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Thu, 17 Nov 2022 15:40:14 +0530
Subject: [PATCH 05/53] 12 M query expression parsed

---
 .../ingestion/source/powerbi/m_parser.py      |   2 +-
 .../powerbi/powerbi-lexical-grammar.rule      |  56 ++++++----
 .../integration/powerbi/test_m_parser.py      | 104 +++++++++---------
 3 files changed, 89 insertions(+), 73 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
index 459201b55d853..73b24b176c009 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
@@ -70,5 +70,5 @@ def __init__(self, tokens: List[BaseToken]):
 
 def parse_expression(expression: str) -> List[Step]:
     grammar: str = pkg_resource.read_text("datahub.ingestion.source.powerbi", "powerbi-lexical-grammar.rule")
-    lark_parser = Lark(grammar,  start="let_expression", parser="lalr", regex=True)
+    lark_parser = Lark(grammar,  start="let_expression", regex=True, debug=True)
     print(lark_parser.parse(expression).pretty())
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule
index 4ca73fb4625c8..2f84d2cf6365f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule
@@ -71,7 +71,7 @@ number_literal: decimal_number_literal
 decimal_digits: decimal_digit
             |   decimal_digits?
 
-decimal_digit: DIGIT
+decimal_digit: /\d+/
 
 hexadecimal_number_literal: "0x" hex_digits
                         |   "0X" hex_digits
@@ -81,7 +81,8 @@ hex_digits: hex_digit
 
 hex_digit: HEXDIGIT
 
-decimal_number_literal: decimal_digits "." decimal_digits exponent_part?
+decimal_number_literal: decimal_digits
+                    |   decimal_digits "." decimal_digits exponent_part?
                     |   decimal_digits exponent_part?
                     |   decimal_digits exponent_part?
 
@@ -100,7 +101,7 @@ text_literal_character:     single_text_character
                     |       double_quote_escape_sequence
 
 single_text_character: /./
-                     | /[^#]/    
+                     | /[^#]/
 
 double_quote_escape_sequence:   "\"\""
 
@@ -155,7 +156,7 @@ connecting_character:   /[\p{Pc}]+/
 
 formatting_character:   /[\p{Cf}]+/
       
-quoted_identifier:      "#" "\"" text_literal_characters? "\""
+quoted_identifier:      "#" ESCAPED_STRING
 
 keyword:    "and"
         |   "as"
@@ -250,20 +251,25 @@ expression: logical_or_expression
 logical_or_expression:  logical_and_expression
                     |   logical_and_expression "or" logical_or_expression
 
-logical_and_expression: is_expression
-                    |   logical_and_expression "and" is_expression
+logical_and_expression: WS_INLINE? NEWLINE? is_expression WS_INLINE? NEWLINE?
+                    |   WS_INLINE?
+                    |   NEWLINE?     
+                    |   logical_and_expression WS_INLINE? "and" WS_INLINE? is_expression
 
 is_expression:  as_expression
+            |   WS_INLINE?
+            |   NEWLINE?     
             |   is_expression "is" nullable_primitive_type
 
 nullable_primitive_type:    "nullable"? primitive_type
 
-as_expression:  equality_expression
+as_expression:  WS_INLINE? equality_expression
             |   as_expression "as" nullable_primitive_type
+            |   WS_INLINE? multiplicative_expression
 
-equality_expression:    relational_expression
-                   |    relational_expression "=" equality_expression
-                   |    relational_expression "<>" equality_expression
+equality_expression:    WS_INLINE? relational_expression
+                   |    WS_INLINE? relational_expression WS_INLINE? "=" WS_INLINE? equality_expression
+                   |    relational_expression WS_INLINE? "<>" WS_INLINE? equality_expression
 
 relational_expression:  additive_expression
                     |   additive_expression "<" relational_expression
@@ -273,11 +279,12 @@ relational_expression:  additive_expression
 
 additive_expression:    multiplicative_expression
                     |   multiplicative_expression "+" additive_expression
-                    |   multiplicative_expression "_" additive_expression
-                    |   multiplicative_expression "&" "_" additive_expression
+                    |   multiplicative_expression WS_INLINE? "_" WS_INLINE? additive_expression
+                    |   multiplicative_expression  WS_INLINE? NEWLINE? WS_INLINE? "&" WS_INLINE? NEWLINE? WS_INLINE? additive_expression
 
-multiplicative_expression:  metadata_expression
-                        |   metadata_expression "*" multiplicative_expression
+
+multiplicative_expression:  WS_INLINE? metadata_expression
+                        |   metadata_expression WS_INLINE? "*" WS_INLINE? multiplicative_expression
                         |   metadata_expression "/" multiplicative_expression
 
 metadata_expression:    unary_expression
@@ -318,12 +325,19 @@ parenthesized_expression:   "(" WS_INLINE? expression WS_INLINE? ")"
 
 not_implemented_expression: "..."
 
-invoke_expression:  primary_expression WS_INLINE? "(" WS_INLINE? argument_list? WS_INLINE? ")"
+invoke_expression:  "#"? primary_expression "(" NEWLINE? argument_list? NEWLINE? ")"
 
-argument_list:  expression
-            |   expression "," argument_list
+argument_list:  WS_INLINE? expression
+            |   WS_INLINE? expression WS_INLINE? "," WS_INLINE? argument_list
             |   "\"" identifier "\""
             |   "\"" identifier "\"" "," argument_list
+            |   WS_INLINE
+            |   WS_INLINE? ESCAPED_STRING
+            |   WS_INLINE? ESCAPED_STRING "," argument_list
+            |   WS_INLINE? record_literal
+            |   WS_INLINE? record_literal "," argument_list
+            |   WS_INLINE? null_literal
+            |   WS_INLINE? null_literal "," argument_list
 
 
 list_expression:    "{" item_list? "}"
@@ -423,11 +437,12 @@ variable:   variable_name WS_INLINE? "=" WS_INLINE? expression
 
 variable_name:    identifier
 
-if_expression:    "if" WS_INLINE if_condition WS_INLINE NEWLINE? "then" WS_INLINE true_expression WS_INLINE "else" WS_INLINE false_expression
+if_expression:    "if" WS_INLINE if_condition WS_INLINE? NEWLINE? WS_INLINE? "then" WS_INLINE? NEWLINE? true_expression WS_INLINE? NEWLINE? "else" WS_INLINE false_expression
 
 if_condition:     expression
 
-true_expression:  expression
+true_expression:  NEWLINE? WS_INLINE? expression
+                | multiplicative_expression
 
 false_expression: expression
 
@@ -523,7 +538,8 @@ record_literal:   "[" WS_INLINE? literal_field_list? WS_INLINE? "]"
 literal_field_list:     literal_field
                   |     literal_field WS_INLINE? "," WS_INLINE? literal_field_list
 
-literal_field:    field_name WS_INLINE? "=" WS_INLINE? any_literal
+literal_field:  field_name WS_INLINE? "=" WS_INLINE? any_literal
+             |  field_name WS_INLINE? "=" WS_INLINE? invoke_expression
 
 list_literal:     "{" WS_INLINE? literal_item_list? WS_INLINE? "}"
 
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index d413cd0c3d7e5..62d6d6d02043f 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -7,55 +7,55 @@ def test_parse_m_query1():
 
 
 def test_parse_m_query2():
-    expression: str = "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n    #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n    #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n    #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n    #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n    #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n    then [UNIT] * 361\nelse 0),\n    #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n    #\"Added Custom2\""
-    m_parser.parse_expression(expression)
-
-
-# def test_parse_m_query3():
-#     expression: str = "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select #(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS Agent,#(lf)TIER,#(lf)UPPER(MANAGER),#(lf)TEAM_TYPE,#(lf)DATE_TARGET,#(lf)MONTHID,#(lf)TARGET_TEAM,#(lf)SELLER_EMAIL,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY,#(lf)UNIT_TARGET AS SME_Quota,#(lf)AMV_TARGET AS Revenue_Quota,#(lf)SERVICE_QUOTA,#(lf)BL_TARGET,#(lf)SOFTWARE_QUOTA as Software_Quota#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Accounting'#(lf)and TARGET_TEAM = 'Enterprise'\", null, [EnableFolding=true]),\n    #\"Added Conditional Column\" = Table.AddColumn(Source, \"Has PS Software Quota?\", each if [TIER] = \"Expansion (Medium)\" then \"Yes\" else if [TIER] = \"Acquisition\" then \"Yes\" else \"No\")\nin\n    #\"Added Conditional Column\""
-#     m_parser.parse_expression(expression)
-#
-#
-# def test_parse_m_query4():
-#     expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *#(lf),concat((UPPER(REPLACE(CLIENT_MANAGER_QUOTED,'-',''))), MONTHID) as AGENT_KEY#(lf),concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY#(lf)#(lf)from V_OIP_ENT_2022\"]),\n    #\"Added Custom\" = Table.AddColumn(Source, \"OIP in $(*$350)\", each [SALES_INVOICE_AMOUNT] * 350),\n    #\"Changed Type\" = Table.TransformColumnTypes(#\"Added Custom\",{{\"OIP in $(*$350)\", type number}})\nin\n    #\"Changed Type\""
-#     m_parser.parse_expression(expression)
-#
-#
-# def test_parse_m_query5():
-#     expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"Select *,#(lf)#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_QUOTED,'-',''))), #(lf)LEFT(CAST(DTE AS DATE),4)+LEFT(RIGHT(CAST(DTE AS DATE),5),2)) AS AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), #(lf)LEFT(CAST(DTE AS DATE),4)+LEFT(RIGHT(CAST(DTE AS DATE),5),2)) AS CD_AGENT_KEY#(lf)#(lf)from V_INVOICE_BOOKING_2022\"]),\n    #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"CLIENT_ID\", Int64.Type}}),\n    #\"Added Conditional Column\" = Table.AddColumn(#\"Changed Type\", \"PS Software (One-Off)\", each if Text.Contains([REVENUE_TYPE], \"Software\") then [Inv_Amt] else if Text.Contains([REVENUE_TYPE], \"Tax Seminar\") then [Inv_Amt] else 0),\n    #\"Filtered Rows\" = Table.SelectRows(#\"Added Conditional Column\", each true),\n    #\"Duplicated Column\" = Table.DuplicateColumn(#\"Filtered Rows\", \"CLIENT_ID\", \"CLIENT_ID - Copy\"),\n    #\"Changed Type1\" = Table.TransformColumnTypes(#\"Duplicated Column\",{{\"CLIENT_ID - Copy\", type text}}),\n    #\"Renamed Columns\" = Table.RenameColumns(#\"Changed Type1\",{{\"CLIENT_ID - Copy\", \"CLIENT_ID for Filter\"}})\nin\n    #\"Renamed Columns\""
-#     m_parser.parse_expression(expression)
-#
-#
-# def test_parse_m_query6():
-#     expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"SELECT *,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), #(lf)LEFT(CAST(MONTH_DATE AS DATE),4)+LEFT(RIGHT(CAST(MONTH_DATE AS DATE),5),2)) AS AGENT_KEY#(lf)#(lf)FROM dbo.V_ARR_ADDS\"]),\n    #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"MONTH_DATE\", type date}}),\n    #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([MONTH_DATE]))\nin\n    #\"Added Custom\""
-#     m_parser.parse_expression(expression)
-#
-#
-# def test_parse_m_query7():
-#     expression: str = "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select #(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS CLIENT_DIRECTOR,#(lf)TIER,#(lf)UPPER(MANAGER),#(lf)TEAM_TYPE,#(lf)DATE_TARGET,#(lf)MONTHID,#(lf)TARGET_TEAM,#(lf)SELLER_EMAIL,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY,#(lf)UNIT_TARGET AS SME_Quota,#(lf)AMV_TARGET AS Revenue_Quota,#(lf)SERVICE_QUOTA,#(lf)BL_TARGET,#(lf)SOFTWARE_QUOTA as Software_Quota#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Accounting'#(lf)and TARGET_TEAM = 'Enterprise'#(lf)AND TIER = 'Client Director'\", null, [EnableFolding=true])\nin\n    Source"
-#     m_parser.parse_expression(expression)
-#
-#
-# def test_parse_m_query8():
-#     expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n    #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n    #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n    #\"Added Custom1\""
-#     m_parser.parse_expression(expression)
-#
-#
-# def test_parse_m_query9():
-#     expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select#(lf)CLIENT_ID,#(lf)PARTNER_ACCOUNT_NAME,#(lf)CM_CLOSING_MNTH_COUNTRY,#(lf)MONTH_WID,#(lf)PS_DELETES,#(lf)CLIENT_MANAGER_CLOSING_MONTH,#(lf)SME_DELETES,#(lf)TPV_AMV_OPENING,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_TPV_LEADERBOARD\", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"MONTH_WID\", type text}}),\n    #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"MONTH_DATE\", each Date.FromText(\nText.Range([MONTH_WID], 0,4) & \"-\"  &\nText.Range([MONTH_WID], 4,2)\n)),\n    #\"Added Custom2\" = Table.AddColumn(#\"Added Custom\", \"Month\", each Date.Month([MONTH_DATE])),\n    #\"Added Custom1\" = Table.AddColumn(#\"Added Custom2\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n    #\"Added Custom1\""
-#     m_parser.parse_expression(expression)
-#
-#
-# def test_parse_m_query10():
-#     expression: str = "let\n    Source = Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"OPERATIONS_ANALYTICS_WAREHOUSE_PROD\",[Role=\"OPERATIONS_ANALYTICS_MEMBER_AD\"]),\n    OPERATIONS_ANALYTICS_Database = Source{[Name=\"OPERATIONS_ANALYTICS\",Kind=\"Database\"]}[Data],\n    TEST_Schema = OPERATIONS_ANALYTICS_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n    LZ_MIGRATION_DOWNLOAD_View = TEST_Schema{[Name=\"LZ_MIGRATION_DOWNLOAD\",Kind=\"View\"]}[Data],\n    #\"Changed Type\" = Table.TransformColumnTypes(LZ_MIGRATION_DOWNLOAD_View,{{\"MIGRATION_MONTH_ID\", type text}}),\n    #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Migration Month\", each Date.FromText(\nText.Range([MIGRATION_MONTH_ID], 0,4) & \"-\" & \nText.Range([MIGRATION_MONTH_ID], 4,2) \n)),\n    #\"Changed Type1\" = Table.TransformColumnTypes(#\"Added Custom\",{{\"Migration Month\", type date}})\nin\n    #\"Changed Type1\""
-#     m_parser.parse_expression(expression)
-#
-#
-# def test_parse_m_query11():
-#     expression: str = "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select *,#(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS Agent,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Industries'#(lf)and TARGET_TEAM = 'Enterprise'\", null, [EnableFolding=true])\nin\n    Source"
-#     m_parser.parse_expression(expression)
-#
-#
-# def test_parse_m_query12():
-#     expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"Select#(lf)*,#(lf)concat((UPPER(REPLACE(SALES_SPECIALIST,'-',''))),#(lf)LEFT(CAST(INVOICE_DATE AS DATE),4)+LEFT(RIGHT(CAST(INVOICE_DATE AS DATE),5),2)) AS AGENT_KEY,#(lf)CASE#(lf)    WHEN CLASS = 'Software' and (NOT(PRODUCT in ('ADV', 'Adv') and left(ACCOUNT_ID,2)='10') #(lf)    or V_ENTERPRISE_INVOICED_REVENUE.TYPE = 'Manual Adjustment') THEN INVOICE_AMOUNT#(lf)    WHEN V_ENTERPRISE_INVOICED_REVENUE.TYPE IN ('Recurring','0') THEN INVOICE_AMOUNT#(lf)    ELSE 0#(lf)END as SOFTWARE_INV#(lf)#(lf)from V_ENTERPRISE_INVOICED_REVENUE\", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #\"Added Conditional Column\" = Table.AddColumn(Source, \"Services\", each if [CLASS] = \"Services\" then [INVOICE_AMOUNT] else 0),\n    #\"Added Custom\" = Table.AddColumn(#\"Added Conditional Column\", \"Advanced New Sites\", each if [PRODUCT] = \"ADV\"\nor [PRODUCT] = \"Adv\"\nthen [NEW_SITE]\nelse 0)\nin\n    #\"Added Custom\""
-#     m_parser.parse_expression(expression)
+    expression: str = "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n    #\"ADDed Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n    #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n    #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n    #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n    #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n    then [UNIT] * 361\nelse 0),\n    #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n    #\"Added Custom2\""
+    m_parser.parse_expression(expression)
+
+
+def test_parse_m_query3():
+    expression: str = "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select #(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS Agent,#(lf)TIER,#(lf)UPPER(MANAGER),#(lf)TEAM_TYPE,#(lf)DATE_TARGET,#(lf)MONTHID,#(lf)TARGET_TEAM,#(lf)SELLER_EMAIL,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY,#(lf)UNIT_TARGET AS SME_Quota,#(lf)AMV_TARGET AS Revenue_Quota,#(lf)SERVICE_QUOTA,#(lf)BL_TARGET,#(lf)SOFTWARE_QUOTA as Software_Quota#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Accounting'#(lf)and TARGET_TEAM = 'Enterprise'\", null, [EnableFolding=true]),\n    #\"Added Conditional Column\" = Table.AddColumn(Source, \"Has PS Software Quota?\", each if [TIER] = \"Expansion (Medium)\" then \"Yes\" else if [TIER] = \"Acquisition\" then \"Yes\" else \"No\")\nin\n    #\"Added Conditional Column\""
+    m_parser.parse_expression(expression)
+
+
+def test_parse_m_query4():
+    expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *#(lf),concat((UPPER(REPLACE(CLIENT_MANAGER_QUOTED,'-',''))), MONTHID) as AGENT_KEY#(lf),concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY#(lf)#(lf)from V_OIP_ENT_2022\"]),\n    #\"Added Custom\" = Table.AddColumn(Source, \"OIP in $(*$350)\", each [SALES_INVOICE_AMOUNT] * 350),\n    #\"Changed Type\" = Table.TransformColumnTypes(#\"Added Custom\",{{\"OIP in $(*$350)\", type number}})\nin\n    #\"Changed Type\""
+    m_parser.parse_expression(expression)
+
+
+def test_parse_m_query5():
+    expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"Select *,#(lf)#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_QUOTED,'-',''))), #(lf)LEFT(CAST(DTE AS DATE),4)+LEFT(RIGHT(CAST(DTE AS DATE),5),2)) AS AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), #(lf)LEFT(CAST(DTE AS DATE),4)+LEFT(RIGHT(CAST(DTE AS DATE),5),2)) AS CD_AGENT_KEY#(lf)#(lf)from V_INVOICE_BOOKING_2022\"]),\n    #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"CLIENT_ID\", Int64.Type}}),\n    #\"Added Conditional Column\" = Table.AddColumn(#\"Changed Type\", \"PS Software (One-Off)\", each if Text.Contains([REVENUE_TYPE], \"Software\") then [Inv_Amt] else if Text.Contains([REVENUE_TYPE], \"Tax Seminar\") then [Inv_Amt] else 0),\n    #\"Filtered Rows\" = Table.SelectRows(#\"Added Conditional Column\", each true),\n    #\"Duplicated Column\" = Table.DuplicateColumn(#\"Filtered Rows\", \"CLIENT_ID\", \"CLIENT_ID - Copy\"),\n    #\"Changed Type1\" = Table.TransformColumnTypes(#\"Duplicated Column\",{{\"CLIENT_ID - Copy\", type text}}),\n    #\"Renamed Columns\" = Table.RenameColumns(#\"Changed Type1\",{{\"CLIENT_ID - Copy\", \"CLIENT_ID for Filter\"}})\nin\n    #\"Renamed Columns\""
+    m_parser.parse_expression(expression)
+
+
+def test_parse_m_query6():
+    expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"SELECT *,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), #(lf)LEFT(CAST(MONTH_DATE AS DATE),4)+LEFT(RIGHT(CAST(MONTH_DATE AS DATE),5),2)) AS AGENT_KEY#(lf)#(lf)FROM dbo.V_ARR_ADDS\"]),\n    #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"MONTH_DATE\", type date}}),\n    #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([MONTH_DATE]))\nin\n    #\"Added Custom\""
+    m_parser.parse_expression(expression)
+
+
+def test_parse_m_query7():
+    expression: str = "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select #(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS CLIENT_DIRECTOR,#(lf)TIER,#(lf)UPPER(MANAGER),#(lf)TEAM_TYPE,#(lf)DATE_TARGET,#(lf)MONTHID,#(lf)TARGET_TEAM,#(lf)SELLER_EMAIL,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY,#(lf)UNIT_TARGET AS SME_Quota,#(lf)AMV_TARGET AS Revenue_Quota,#(lf)SERVICE_QUOTA,#(lf)BL_TARGET,#(lf)SOFTWARE_QUOTA as Software_Quota#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Accounting'#(lf)and TARGET_TEAM = 'Enterprise'#(lf)AND TIER = 'Client Director'\", null, [EnableFolding=true])\nin\n    Source"
+    m_parser.parse_expression(expression)
+
+
+def test_parse_m_query8():
+    expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n    #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n    #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n    #\"Added Custom1\""
+    m_parser.parse_expression(expression)
+
+
+def test_parse_m_query9():
+    expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select#(lf)CLIENT_ID,#(lf)PARTNER_ACCOUNT_NAME,#(lf)CM_CLOSING_MNTH_COUNTRY,#(lf)MONTH_WID,#(lf)PS_DELETES,#(lf)CLIENT_MANAGER_CLOSING_MONTH,#(lf)SME_DELETES,#(lf)TPV_AMV_OPENING,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_TPV_LEADERBOARD\", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"MONTH_WID\", type text}}),\n    #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"MONTH_DATE\", each Date.FromText(\nText.Range([MONTH_WID], 0,4) & \"-\"  &\nText.Range([MONTH_WID], 4,2)\n)),\n    #\"Added Custom2\" = Table.AddColumn(#\"Added Custom\", \"Month\", each Date.Month([MONTH_DATE])),\n    #\"Added Custom1\" = Table.AddColumn(#\"Added Custom2\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n    #\"Added Custom1\""
+    m_parser.parse_expression(expression)
+
+
+def test_parse_m_query10():
+    expression: str = "let\n    Source = Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"OPERATIONS_ANALYTICS_WAREHOUSE_PROD\",[Role=\"OPERATIONS_ANALYTICS_MEMBER_AD\"]),\n    OPERATIONS_ANALYTICS_Database = Source{[Name=\"OPERATIONS_ANALYTICS\",Kind=\"Database\"]}[Data],\n    TEST_Schema = OPERATIONS_ANALYTICS_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n    LZ_MIGRATION_DOWNLOAD_View = TEST_Schema{[Name=\"LZ_MIGRATION_DOWNLOAD\",Kind=\"View\"]}[Data],\n    #\"Changed Type\" = Table.TransformColumnTypes(LZ_MIGRATION_DOWNLOAD_View,{{\"MIGRATION_MONTH_ID\", type text}}),\n    #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Migration Month\", each Date.FromText(\nText.Range([MIGRATION_MONTH_ID], 0,4) & \"-\" & \nText.Range([MIGRATION_MONTH_ID], 4,2) \n)),\n    #\"Changed Type1\" = Table.TransformColumnTypes(#\"Added Custom\",{{\"Migration Month\", type date}})\nin\n    #\"Changed Type1\""
+    m_parser.parse_expression(expression)
+
+
+def test_parse_m_query11():
+    expression: str = "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select *,#(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS Agent,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Industries'#(lf)and TARGET_TEAM = 'Enterprise'\", null, [EnableFolding=true])\nin\n    Source"
+    m_parser.parse_expression(expression)
+
+
+def test_parse_m_query12():
+    expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"Select#(lf)*,#(lf)concat((UPPER(REPLACE(SALES_SPECIALIST,'-',''))),#(lf)LEFT(CAST(INVOICE_DATE AS DATE),4)+LEFT(RIGHT(CAST(INVOICE_DATE AS DATE),5),2)) AS AGENT_KEY,#(lf)CASE#(lf)    WHEN CLASS = 'Software' and (NOT(PRODUCT in ('ADV', 'Adv') and left(ACCOUNT_ID,2)='10') #(lf)    or V_ENTERPRISE_INVOICED_REVENUE.TYPE = 'Manual Adjustment') THEN INVOICE_AMOUNT#(lf)    WHEN V_ENTERPRISE_INVOICED_REVENUE.TYPE IN ('Recurring','0') THEN INVOICE_AMOUNT#(lf)    ELSE 0#(lf)END as SOFTWARE_INV#(lf)#(lf)from V_ENTERPRISE_INVOICED_REVENUE\", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #\"Added Conditional Column\" = Table.AddColumn(Source, \"Services\", each if [CLASS] = \"Services\" then [INVOICE_AMOUNT] else 0),\n    #\"Added Custom\" = Table.AddColumn(#\"Added Conditional Column\", \"Advanced New Sites\", each if [PRODUCT] = \"ADV\"\nor [PRODUCT] = \"Adv\"\nthen [NEW_SITE]\nelse 0)\nin\n    #\"Added Custom\""
+    m_parser.parse_expression(expression)

From 9651e5413af15f0e69a64eee40ea624287b3080d Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Thu, 17 Nov 2022 20:03:30 +0530
Subject: [PATCH 06/53] test cases

---
 .../ingestion/source/powerbi/m_parser.py      | 17 ++++--
 .../integration/powerbi/test_m_parser.py      | 54 ++++++++++++++-----
 2 files changed, 55 insertions(+), 16 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
index 73b24b176c009..e29ffc40167ef 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
@@ -1,7 +1,10 @@
+import logging
 from abc import ABC, abstractmethod
 from typing import Optional, List, Dict
 import importlib.resources as pkg_resource
-from lark import Lark
+from lark import Lark, Tree
+
+logger = logging.getLogger(__name__)
 
 
 class Token(ABC):
@@ -68,7 +71,13 @@ def __init__(self, tokens: List[BaseToken]):
 }
 
 
-def parse_expression(expression: str) -> List[Step]:
+def parse_expression(expression: str) -> Tree:
     grammar: str = pkg_resource.read_text("datahub.ingestion.source.powerbi", "powerbi-lexical-grammar.rule")
-    lark_parser = Lark(grammar,  start="let_expression", regex=True, debug=True)
-    print(lark_parser.parse(expression).pretty())
+    lark_parser = Lark(grammar,  start="let_expression", regex=True)
+
+    parse_tree: Tree = lark_parser.parse(expression)
+
+    logger.debug("Parse Tree")
+    logger.debug(parse_tree.pretty())
+
+    return parse_tree
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index 62d6d6d02043f..d3ed155ffc816 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -1,61 +1,91 @@
+from typing import List
+
 from datahub.ingestion.source.powerbi import m_parser
 
+from lark import Visitor, Tree
+
+
+def get_output_dataset(root: Tree):
+    def get_token_list_for_any(tree: Tree, rules: List[str]):
+        for rule in rules:
+            token_list = [x for x in tree.find_data(rule)]
+            if len(token_list) > 0:
+                return token_list
+
+        return []
+
+    for tree in root.find_data("in_expression"):
+        for child1 in get_token_list_for_any(tree, ["letter_character", "quoted_identifier"]):
+            return child1.children[0].value
+
 
 def test_parse_m_query1():
     expression: str = "let\n    Source = Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n    PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n    TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n    TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n    TESTTABLE_Table"
-    m_parser.parse_expression(expression)
+    parse_tree: Tree = m_parser.parse_expression(expression)
+    assert get_output_dataset(parse_tree) == "TESTTABLE_Table"
 
 
 def test_parse_m_query2():
     expression: str = "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n    #\"ADDed Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n    #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n    #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n    #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n    #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n    then [UNIT] * 361\nelse 0),\n    #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n    #\"Added Custom2\""
-    m_parser.parse_expression(expression)
+    parse_tree: Tree = m_parser.parse_expression(expression)
+    assert get_output_dataset(parse_tree) == "\"Added Custom2\""
 
 
 def test_parse_m_query3():
     expression: str = "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select #(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS Agent,#(lf)TIER,#(lf)UPPER(MANAGER),#(lf)TEAM_TYPE,#(lf)DATE_TARGET,#(lf)MONTHID,#(lf)TARGET_TEAM,#(lf)SELLER_EMAIL,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY,#(lf)UNIT_TARGET AS SME_Quota,#(lf)AMV_TARGET AS Revenue_Quota,#(lf)SERVICE_QUOTA,#(lf)BL_TARGET,#(lf)SOFTWARE_QUOTA as Software_Quota#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Accounting'#(lf)and TARGET_TEAM = 'Enterprise'\", null, [EnableFolding=true]),\n    #\"Added Conditional Column\" = Table.AddColumn(Source, \"Has PS Software Quota?\", each if [TIER] = \"Expansion (Medium)\" then \"Yes\" else if [TIER] = \"Acquisition\" then \"Yes\" else \"No\")\nin\n    #\"Added Conditional Column\""
-    m_parser.parse_expression(expression)
+    parse_tree: Tree = m_parser.parse_expression(expression)
+    assert get_output_dataset(parse_tree) == "\"Added Conditional Column\""
 
 
 def test_parse_m_query4():
     expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *#(lf),concat((UPPER(REPLACE(CLIENT_MANAGER_QUOTED,'-',''))), MONTHID) as AGENT_KEY#(lf),concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY#(lf)#(lf)from V_OIP_ENT_2022\"]),\n    #\"Added Custom\" = Table.AddColumn(Source, \"OIP in $(*$350)\", each [SALES_INVOICE_AMOUNT] * 350),\n    #\"Changed Type\" = Table.TransformColumnTypes(#\"Added Custom\",{{\"OIP in $(*$350)\", type number}})\nin\n    #\"Changed Type\""
-    m_parser.parse_expression(expression)
+    parse_tree: Tree = m_parser.parse_expression(expression)
+    assert get_output_dataset(parse_tree) == "\"Changed Type\""
 
 
 def test_parse_m_query5():
     expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"Select *,#(lf)#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_QUOTED,'-',''))), #(lf)LEFT(CAST(DTE AS DATE),4)+LEFT(RIGHT(CAST(DTE AS DATE),5),2)) AS AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), #(lf)LEFT(CAST(DTE AS DATE),4)+LEFT(RIGHT(CAST(DTE AS DATE),5),2)) AS CD_AGENT_KEY#(lf)#(lf)from V_INVOICE_BOOKING_2022\"]),\n    #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"CLIENT_ID\", Int64.Type}}),\n    #\"Added Conditional Column\" = Table.AddColumn(#\"Changed Type\", \"PS Software (One-Off)\", each if Text.Contains([REVENUE_TYPE], \"Software\") then [Inv_Amt] else if Text.Contains([REVENUE_TYPE], \"Tax Seminar\") then [Inv_Amt] else 0),\n    #\"Filtered Rows\" = Table.SelectRows(#\"Added Conditional Column\", each true),\n    #\"Duplicated Column\" = Table.DuplicateColumn(#\"Filtered Rows\", \"CLIENT_ID\", \"CLIENT_ID - Copy\"),\n    #\"Changed Type1\" = Table.TransformColumnTypes(#\"Duplicated Column\",{{\"CLIENT_ID - Copy\", type text}}),\n    #\"Renamed Columns\" = Table.RenameColumns(#\"Changed Type1\",{{\"CLIENT_ID - Copy\", \"CLIENT_ID for Filter\"}})\nin\n    #\"Renamed Columns\""
-    m_parser.parse_expression(expression)
+    parse_tree: Tree = m_parser.parse_expression(expression)
+    assert get_output_dataset(parse_tree) == "\"Renamed Columns\""
 
 
 def test_parse_m_query6():
     expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"SELECT *,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), #(lf)LEFT(CAST(MONTH_DATE AS DATE),4)+LEFT(RIGHT(CAST(MONTH_DATE AS DATE),5),2)) AS AGENT_KEY#(lf)#(lf)FROM dbo.V_ARR_ADDS\"]),\n    #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"MONTH_DATE\", type date}}),\n    #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([MONTH_DATE]))\nin\n    #\"Added Custom\""
-    m_parser.parse_expression(expression)
+    parse_tree: Tree = m_parser.parse_expression(expression)
+    assert get_output_dataset(parse_tree) == "\"Added Custom\""
 
 
 def test_parse_m_query7():
     expression: str = "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select #(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS CLIENT_DIRECTOR,#(lf)TIER,#(lf)UPPER(MANAGER),#(lf)TEAM_TYPE,#(lf)DATE_TARGET,#(lf)MONTHID,#(lf)TARGET_TEAM,#(lf)SELLER_EMAIL,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY,#(lf)UNIT_TARGET AS SME_Quota,#(lf)AMV_TARGET AS Revenue_Quota,#(lf)SERVICE_QUOTA,#(lf)BL_TARGET,#(lf)SOFTWARE_QUOTA as Software_Quota#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Accounting'#(lf)and TARGET_TEAM = 'Enterprise'#(lf)AND TIER = 'Client Director'\", null, [EnableFolding=true])\nin\n    Source"
-    m_parser.parse_expression(expression)
+    parse_tree: Tree = m_parser.parse_expression(expression)
+    assert get_output_dataset(parse_tree) == "Source"
 
 
 def test_parse_m_query8():
     expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n    #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n    #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n    #\"Added Custom1\""
-    m_parser.parse_expression(expression)
+    parse_tree: Tree = m_parser.parse_expression(expression)
+    assert get_output_dataset(parse_tree) == "\"Added Custom1\""
 
 
 def test_parse_m_query9():
     expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select#(lf)CLIENT_ID,#(lf)PARTNER_ACCOUNT_NAME,#(lf)CM_CLOSING_MNTH_COUNTRY,#(lf)MONTH_WID,#(lf)PS_DELETES,#(lf)CLIENT_MANAGER_CLOSING_MONTH,#(lf)SME_DELETES,#(lf)TPV_AMV_OPENING,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_TPV_LEADERBOARD\", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"MONTH_WID\", type text}}),\n    #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"MONTH_DATE\", each Date.FromText(\nText.Range([MONTH_WID], 0,4) & \"-\"  &\nText.Range([MONTH_WID], 4,2)\n)),\n    #\"Added Custom2\" = Table.AddColumn(#\"Added Custom\", \"Month\", each Date.Month([MONTH_DATE])),\n    #\"Added Custom1\" = Table.AddColumn(#\"Added Custom2\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n    #\"Added Custom1\""
-    m_parser.parse_expression(expression)
+    parse_tree: Tree = m_parser.parse_expression(expression)
+    assert get_output_dataset(parse_tree) == "\"Added Custom1\""
 
 
 def test_parse_m_query10():
     expression: str = "let\n    Source = Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"OPERATIONS_ANALYTICS_WAREHOUSE_PROD\",[Role=\"OPERATIONS_ANALYTICS_MEMBER_AD\"]),\n    OPERATIONS_ANALYTICS_Database = Source{[Name=\"OPERATIONS_ANALYTICS\",Kind=\"Database\"]}[Data],\n    TEST_Schema = OPERATIONS_ANALYTICS_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n    LZ_MIGRATION_DOWNLOAD_View = TEST_Schema{[Name=\"LZ_MIGRATION_DOWNLOAD\",Kind=\"View\"]}[Data],\n    #\"Changed Type\" = Table.TransformColumnTypes(LZ_MIGRATION_DOWNLOAD_View,{{\"MIGRATION_MONTH_ID\", type text}}),\n    #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Migration Month\", each Date.FromText(\nText.Range([MIGRATION_MONTH_ID], 0,4) & \"-\" & \nText.Range([MIGRATION_MONTH_ID], 4,2) \n)),\n    #\"Changed Type1\" = Table.TransformColumnTypes(#\"Added Custom\",{{\"Migration Month\", type date}})\nin\n    #\"Changed Type1\""
-    m_parser.parse_expression(expression)
+    parse_tree: Tree = m_parser.parse_expression(expression)
+    assert get_output_dataset(parse_tree) == "\"Changed Type1\""
 
 
 def test_parse_m_query11():
     expression: str = "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select *,#(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS Agent,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Industries'#(lf)and TARGET_TEAM = 'Enterprise'\", null, [EnableFolding=true])\nin\n    Source"
-    m_parser.parse_expression(expression)
+    parse_tree: Tree = m_parser.parse_expression(expression)
+    assert get_output_dataset(parse_tree) == "Source"
 
 
 def test_parse_m_query12():
     expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"Select#(lf)*,#(lf)concat((UPPER(REPLACE(SALES_SPECIALIST,'-',''))),#(lf)LEFT(CAST(INVOICE_DATE AS DATE),4)+LEFT(RIGHT(CAST(INVOICE_DATE AS DATE),5),2)) AS AGENT_KEY,#(lf)CASE#(lf)    WHEN CLASS = 'Software' and (NOT(PRODUCT in ('ADV', 'Adv') and left(ACCOUNT_ID,2)='10') #(lf)    or V_ENTERPRISE_INVOICED_REVENUE.TYPE = 'Manual Adjustment') THEN INVOICE_AMOUNT#(lf)    WHEN V_ENTERPRISE_INVOICED_REVENUE.TYPE IN ('Recurring','0') THEN INVOICE_AMOUNT#(lf)    ELSE 0#(lf)END as SOFTWARE_INV#(lf)#(lf)from V_ENTERPRISE_INVOICED_REVENUE\", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #\"Added Conditional Column\" = Table.AddColumn(Source, \"Services\", each if [CLASS] = \"Services\" then [INVOICE_AMOUNT] else 0),\n    #\"Added Custom\" = Table.AddColumn(#\"Added Conditional Column\", \"Advanced New Sites\", each if [PRODUCT] = \"ADV\"\nor [PRODUCT] = \"Adv\"\nthen [NEW_SITE]\nelse 0)\nin\n    #\"Added Custom\""
-    m_parser.parse_expression(expression)
+    parse_tree: Tree = m_parser.parse_expression(expression)
+    assert get_output_dataset(parse_tree) == "\"Added Custom\""

From 6f4d0cc3365bc94da2555d2018f5099a6f8ef12b Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Wed, 23 Nov 2022 21:11:08 +0530
Subject: [PATCH 07/53] WIP

---
 .../ingestion/source/powerbi/m_parser.py      |  75 +----
 .../ingestion/source/powerbi/powerbi.py       | 276 ++++++++++--------
 .../integration/powerbi/test_m_parser.py      |  46 +--
 .../tests/integration/powerbi/test_powerbi.py |   5 +
 4 files changed, 192 insertions(+), 210 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
index e29ffc40167ef..2a9f96742f59b 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
@@ -1,79 +1,16 @@
-import logging
-from abc import ABC, abstractmethod
-from typing import Optional, List, Dict
 import importlib.resources as pkg_resource
+import logging
+
 from lark import Lark, Tree
 
 logger = logging.getLogger(__name__)
 
 
-class Token(ABC):
-    @abstractmethod
-    def parse_raw_token(self) -> str:
-        pass
-
-
-class BaseToken(Token, ABC):
-    _raw_token: str
-    _nested_tokens: Optional[List["BaseToken"]]
-
-    def __init__(self, raw_token: str, nested_tokens: Optional[List["BaseToken"]]):
-        self._raw_token = raw_token
-        self._nested_tokens = nested_tokens
-        self.parse_raw_token(self._raw_token)
-
-
-class LetToken(BaseToken):
-    def __init__(self, raw_token: str, nested_raw_tokens: Optional[List["Token"]]):
-        super().__init__(raw_token, nested_raw_tokens)
-
-    def parse_raw_token(self) -> str:
-        pass
-
-
-class TableFuncToken(BaseToken):
-    def __init__(self, raw_token: str, nested_raw_tokens: Optional[List["BaseToken"]]):
-        super().__init__(raw_token, nested_raw_tokens)
-
-    def parse_raw_token(self) -> str:
-        pass
-
-
-class DataAccessToken(BaseToken):
-    def __init__(self, raw_token: str, nested_raw_tokens: Optional[List["BaseToken"]]):
-        super().__init__(raw_token, nested_raw_tokens)
-
-    def parse_raw_token(self) -> str:
-        pass
-
-
-class OracleDataAccessToken(BaseToken):
-    def __init__(self, raw_token: str, nested_raw_tokens: Optional[List["BaseToken"]]):
-        super().__init__(raw_token, nested_raw_tokens)
-
-    def parse_raw_token(self) -> str:
-        pass
-
-
-class Step:
-    tokens: List[BaseToken]
-    def __init__(self, tokens: List[BaseToken]):
-        self.tokens = tokens
-
-
-token_registry: Dict[str, BaseToken] = {
-    "let": LetToken,
-    "Table": TableFuncToken,
-    "PostgreSQL.Database": DataAccessToken,
-    "DB2.Database": DataAccessToken,
-    "Sql.Database": DataAccessToken,
-    "Oracle.Database": OracleDataAccessToken,
-}
-
-
 def parse_expression(expression: str) -> Tree:
-    grammar: str = pkg_resource.read_text("datahub.ingestion.source.powerbi", "powerbi-lexical-grammar.rule")
-    lark_parser = Lark(grammar,  start="let_expression", regex=True)
+    grammar: str = pkg_resource.read_text(
+        "datahub.ingestion.source.powerbi", "powerbi-lexical-grammar.rule"
+    )
+    lark_parser = Lark(grammar, start="let_expression", regex=True)
 
     parse_tree: Tree = lark_parser.parse(expression)
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
index f709d20637e9e..cfe65a5a99726 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
@@ -40,8 +40,6 @@
     CorpUserKeyClass,
     DashboardInfoClass,
     DashboardKeyClass,
-    DataPlatformInfoClass,
-    DatasetKeyClass,
     DatasetPropertiesClass,
     OwnerClass,
     OwnershipClass,
@@ -104,6 +102,26 @@ class Constant:
     HTTP_RESPONSE_STATUS_CODE = "HttpResponseStatusCode"
 
 
+@dataclass
+class PowerBiDashboardSourceReport(SourceReport):
+    dashboards_scanned: int = 0
+    charts_scanned: int = 0
+    filtered_dashboards: List[str] = dataclass_field(default_factory=list)
+    filtered_charts: List[str] = dataclass_field(default_factory=list)
+
+    def report_dashboards_scanned(self, count: int = 1) -> None:
+        self.dashboards_scanned += count
+
+    def report_charts_scanned(self, count: int = 1) -> None:
+        self.charts_scanned += count
+
+    def report_dashboards_dropped(self, model: str) -> None:
+        self.filtered_dashboards.append(model)
+
+    def report_charts_dropped(self, view: str) -> None:
+        self.filtered_charts.append(view)
+
+
 class PowerBiAPIConfig(EnvBasedSourceConfigBase):
     # Organsation Identifier
     tenant_id: str = pydantic.Field(description="PowerBI tenant identifier")
@@ -172,19 +190,9 @@ class DataSource:
         PowerBi
         """
 
-        @dataclass
-        class MetaData:
-            """
-            MetaData about DataSource
-            """
-
-            is_relational: Boolean
-
         id: str
         type: str
-        database: Optional[str]
-        server: Optional[str]
-        metadata: Any
+        raw_connection_detail: Dict
 
         def __members(self):
             return (self.id,)
@@ -200,19 +208,19 @@ def __hash__(self):
 
     # dataclasses for PowerBi Dashboard
     @dataclass
-    class Dataset:
+    class PowerBIDataset:
         @dataclass
         class Table:
             name: str
-            schema_name: str
+            full_name: str
+            data_source: "PowerBiAPI.DataSource"  # We are supporting single data_source for the table
 
         id: str
         name: str
         webUrl: Optional[str]
         workspace_id: str
-        datasource: Any
         # Table in datasets
-        tables: List[Any]
+        tables: List["Table"]
 
         def get_urn_part(self):
             return f"datasets.{self.id}"
@@ -222,7 +230,7 @@ def __members(self):
 
         def __eq__(self, instance):
             return (
-                isinstance(instance, PowerBiAPI.Dataset)
+                isinstance(instance, PowerBiAPI.PowerBIDataset)
                 and self.__members() == instance.__members()
             )
 
@@ -312,7 +320,6 @@ def __hash__(self):
     def __init__(self, config: PowerBiAPIConfig) -> None:
         self.__config: PowerBiAPIConfig = config
         self.__access_token: str = ""
-
         # Power-Bi Auth (Service Principal Auth)
         self.__msal_client = msal.ConfidentialClientApplication(
             self.__config.client_id,
@@ -542,7 +549,7 @@ def get_dataset(self, workspace_id: str, dataset_id: str) -> Any:
         response_dict = response.json()
         LOGGER.debug("datasets = {}".format(response_dict))
         # PowerBi Always return the webURL, in-case if it is None then setting complete webURL to None instead of None/details
-        return PowerBiAPI.Dataset(
+        return PowerBiAPI.PowerBIDataset(
             id=response_dict.get("id"),
             name=response_dict.get("name"),
             webUrl="{}/details".format(response_dict.get("webUrl"))
@@ -550,10 +557,11 @@ def get_dataset(self, workspace_id: str, dataset_id: str) -> Any:
             else None,
             workspace_id=workspace_id,
             tables=[],
-            datasource=None,
         )
 
-    def get_data_source(self, dataset: Dataset) -> Any:
+    def get_data_sources(
+        self, dataset: PowerBIDataset
+    ) -> Dict[str, "PowerBiAPI.DataSource"]:
         """
         Fetch the data source from PowerBi for the given dataset
         """
@@ -594,43 +602,21 @@ def get_data_source(self, dataset: Dataset) -> Any:
 
             return None
 
-        if len(value) > 1:
-            # We are currently supporting data-set having single relational database
-            LOGGER.warning(
-                "More than one data-source found for {}({})".format(
-                    dataset.name, dataset.id
-                )
-            )
-            LOGGER.debug(value)
-            return None
-
-        # Consider only zero index datasource
-        datasource_dict = value[0]
+        data_sources: Dict[str, "PowerBiAPI.DataSource"] = {}
         LOGGER.debug("data-sources = {}".format(value))
-        # Create datasource instance with basic detail available
-        datasource = PowerBiAPI.DataSource(
-            id=datasource_dict.get(
-                "datasourceId"
-            ),  # datasourceId is not available in all cases
-            type=datasource_dict["datasourceType"],
-            server=None,
-            database=None,
-            metadata=None,
-        )
-
-        # Check if datasource is relational as per our relation mapping
-        if self.__config.dataset_type_mapping.get(datasource.type) is not None:
-            # Now set the database detail as it is relational data source
-            datasource.metadata = PowerBiAPI.DataSource.MetaData(is_relational=True)
-            datasource.database = datasource_dict["connectionDetails"]["database"]
-            datasource.server = datasource_dict["connectionDetails"]["server"]
-        else:
-            datasource.metadata = PowerBiAPI.DataSource.MetaData(is_relational=False)
-            LOGGER.warning(
-                "Non relational data-source found = {}".format(datasource_dict)
+        for datasource_dict in value:
+            # Create datasource instance with basic detail available
+            datasource = PowerBiAPI.DataSource(
+                id=datasource_dict.get(
+                    "datasourceId"
+                ),  # datasourceId is not available in all cases
+                type=datasource_dict["datasourceType"],
+                raw_connection_detail=datasource_dict["connectionDetails"],
             )
 
-        return datasource
+            data_sources[datasource.id] = datasource
+
+        return data_sources
 
     def get_tiles(self, workspace: Workspace, dashboard: Dashboard) -> List[Tile]:
 
@@ -712,10 +698,46 @@ def new_dataset_or_report(tile_instance: Any) -> dict:
 
         return tiles
 
+    def process_extension_table(
+        self, data_source: "PowerBiAPI.DataSource", raw_table: dict
+    ) -> (str, str, str):
+        # All below four condition should meet to process the Extension data-source type
+        if data_source.type != "Extension":
+            LOGGER.debug(f"data_source ({data_source.id}) type is not Extension")
+            return None, None
+        if data_source.raw_connection_detail.get("connectionDetails") is None:
+            LOGGER.debug(
+                f"data_source ({data_source.id}) type is missing connectionDetails"
+            )
+            return None, None
+        if (
+            data_source.raw_connection_detail["connectionDetails"].get(
+                "extensionDataSourceKind"
+            )
+            is None
+        ):
+            LOGGER.debug(
+                f"data_source ({data_source.id}) type is missing extensionDataSourceKind"
+            )
+            return None, None
+
+        if (
+            data_source.raw_connection_detail["connectionDetails"][
+                "extensionDataSourceKind"
+            ]
+            not in self.__config.dataset_type_mapping
+        ):
+            LOGGER.debug(f"expected platforms are {self.__config.dataset_type_mapping}")
+            return None, None
+        # fake and foo need to be find out from M-Query
+        return raw_table["name"], "foo_db.fake_schema.{}".format(raw_table["name"])
+
     # flake8: noqa: C901
-    def get_workspace(self, workspace_id: str) -> Workspace:
+    def get_workspace(
+        self, workspace_id: str, reporter: PowerBiDashboardSourceReport
+    ) -> Workspace:
         """
-        Return Workspace for the given workspace identifier i.e workspace_id
+        Return Workspace for the given workspace identifier i.e. workspace_id
         """
         scan_create_endpoint = PowerBiAPI.API_ENDPOINTS[Constant.SCAN_CREATE]
         scan_create_endpoint = scan_create_endpoint.format(
@@ -839,40 +861,85 @@ def json_to_dataset_map(scan_result: dict) -> dict:
                 return dataset_map
 
             for dataset_dict in datasets:
-                dataset_instance: PowerBiAPI.Dataset = self.get_dataset(
+                dataset_instance: PowerBiAPI.PowerBIDataset = self.get_dataset(
                     workspace_id=scan_result["id"],
                     dataset_id=dataset_dict["id"],
                 )
-
                 dataset_map[dataset_instance.id] = dataset_instance
-                # set dataset's DataSource
-                dataset_instance.datasource = self.get_data_source(dataset_instance)
-                # Set table only if the datasource is relational and dataset is not created from custom SQL i.e Value.NativeQuery(
-                # There are dataset which doesn't have DataSource
-                if (
-                    dataset_instance.datasource
-                    and dataset_instance.datasource.metadata.is_relational is True
-                ):
-                    LOGGER.info(
-                        f"Processing tables attribute for dataset {dataset_instance.name}({dataset_instance.id})"
+                # Map of data-source attached to this dataset
+                data_source_map: Dict[
+                    str, PowerBiAPI.DataSource
+                ] = self.get_data_sources(dataset_instance)
+                for table in dataset_dict["tables"]:
+                    warning_key_prefix: str = "{}_{}".format(
+                        dataset_dict.get("id") if dataset_dict.get("name") is None else dataset_dict.get("name"), table["name"]
                     )
 
-                    for table in dataset_dict["tables"]:
-                        if "Value.NativeQuery(" in table["source"][0]["expression"]:
-                            LOGGER.warning(
-                                f'Table {table["name"]} is created from Custom SQL. Ignoring in processing'
-                            )
+                    if table.get("source") is None:
+                        reporter.report_warning(
+                            f"{warning_key_prefix}-source",
+                            "table without source is not supported",
+                        )
+                        continue
 
-                            continue
+                    if "Value.NativeQuery(" in table["source"][0]["expression"]:
+                        reporter.report_warning(
+                            f"{warning_key_prefix}-native-query",
+                            "NativeQuery is not supported",
+                        )
+                        continue
 
+                    if table.get("datasourceUsages") is None:
+                        reporter.report_warning(
+                            f"{warning_key_prefix}-no-source",
+                            "table does not have any source",
+                        )
+                        continue
+
+                    if len(table["datasourceUsages"]) > 1:
+                        reporter.report_warning(
+                            f"{warning_key_prefix}-many-source",
+                            "Multiple data-sources for single table is not supported",
+                        )
+                        continue
+
+                    data_source: PowerBiAPI.DataSource = data_source_map[
+                        table["datasourceUsages"][0]["datasourceInstanceId"]
+                    ]
+                    table_name: str = None
+                    table_full_name: str = None
+                    if data_source.type == "Extension":
+                        table_name, table_full_name = self.process_extension_table(
+                            data_source, table
+                        )
+                    elif (
+                        self.__config.dataset_type_mapping.get(data_source.type)
+                        is not None
+                    ):
                         # PowerBi table name contains schema name and table name. Format is <SchemaName> <TableName>
-                        schema_and_name = table["name"].split(" ")
-                        dataset_instance.tables.append(
-                            PowerBiAPI.Dataset.Table(
-                                schema_name=schema_and_name[0],
-                                name=schema_and_name[1],
-                            )
+                        table_name = table["name"].split(" ")[1]
+                        table_schema_name: str = table["name"].split(" ")[0]
+                        database_name: str = data_source.raw_connection_detail[
+                            "database"
+                        ]
+                        table_full_name = (
+                            f"{database_name}.{table_schema_name}.{table_name}"
+                        )
+
+                    if None in (table_name, table_full_name):
+                        reporter.report_warning(
+                            f"{warning_key_prefix}-extension",
+                            f"The table source ({data_source.id}) is not belongs to supported platforms: {self.__config.dataset_type_mapping}",
                         )
+                        continue
+
+                    dataset_instance.tables.append(
+                        PowerBiAPI.PowerBIDataset.Table(
+                            full_name=table_full_name,
+                            name=table_name,
+                            data_source=data_source,
+                        )
+                    )
 
             return dataset_map
 
@@ -899,8 +966,8 @@ def init_dashboard_tiles(workspace: PowerBiAPI.Workspace) -> None:
 
         # Scan is complete lets take the result
         scan_result = get_scan_result(scan_id=scan_id)
-        LOGGER.debug("scan result = {}".format(scan_result))
         import json
+
         print(json.dumps(scan_result, indent=1))
         workspace = PowerBiAPI.Workspace(
             id=scan_result["id"],
@@ -969,7 +1036,7 @@ def __to_work_unit(
         )
 
     def __to_datahub_dataset(
-        self, dataset: Optional[PowerBiAPI.Dataset]
+        self, dataset: Optional[PowerBiAPI.PowerBIDataset]
     ) -> List[MetadataChangeProposalWrapper]:
         """
         Map PowerBi dataset to datahub dataset. Here we are mapping each table of PowerBi Dataset to Datahub dataset.
@@ -980,26 +1047,15 @@ def __to_datahub_dataset(
         if dataset is None:
             return dataset_mcps
 
-        # We are only supporting relation PowerBi DataSources
-        if (
-            dataset.datasource is None
-            or dataset.datasource.metadata.is_relational is False
-        ):
-            LOGGER.warning(
-                f"Dataset {dataset.name}({dataset.id}) is not created from relational datasource"
-            )
-
-            return dataset_mcps
-
         LOGGER.info(
             f"Converting dataset={dataset.name}(id={dataset.id}) to datahub dataset"
         )
 
         for table in dataset.tables:
-            # Create an URN for dataset
+            # Create a URN for dataset
             ds_urn = builder.make_dataset_urn(
-                platform=self.__config.dataset_type_mapping[dataset.datasource.type],
-                name=f"{dataset.datasource.database}.{table.schema_name}.{table.name}",
+                platform=self.__config.dataset_type_mapping[table.data_source.type],
+                name=f"{table.full_name}",
                 env=self.__config.env,
             )
 
@@ -1323,26 +1379,6 @@ def to_datahub_work_units(
         return deduplicate_list([wu for wu in work_units if wu is not None])
 
 
-@dataclass
-class PowerBiDashboardSourceReport(SourceReport):
-    dashboards_scanned: int = 0
-    charts_scanned: int = 0
-    filtered_dashboards: List[str] = dataclass_field(default_factory=list)
-    filtered_charts: List[str] = dataclass_field(default_factory=list)
-
-    def report_dashboards_scanned(self, count: int = 1) -> None:
-        self.dashboards_scanned += count
-
-    def report_charts_scanned(self, count: int = 1) -> None:
-        self.charts_scanned += count
-
-    def report_dashboards_dropped(self, model: str) -> None:
-        self.filtered_dashboards.append(model)
-
-    def report_charts_dropped(self, view: str) -> None:
-        self.filtered_charts.append(view)
-
-
 @platform_name("PowerBI")
 @config_class(PowerBiDashboardSourceConfig)
 @support_status(SupportStatus.CERTIFIED)
@@ -1381,7 +1417,9 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
         LOGGER.info("PowerBi plugin execution is started")
 
         # Fetch PowerBi workspace for given workspace identifier
-        workspace = self.powerbi_client.get_workspace(self.source_config.workspace_id)
+        workspace = self.powerbi_client.get_workspace(
+            self.source_config.workspace_id, self.reporter
+        )
 
         for dashboard in workspace.dashboards:
 
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index d3ed155ffc816..7703b93240fc9 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -1,8 +1,8 @@
 from typing import List
 
-from datahub.ingestion.source.powerbi import m_parser
+from lark import Tree
 
-from lark import Visitor, Tree
+from datahub.ingestion.source.powerbi import m_parser
 
 
 def get_output_dataset(root: Tree):
@@ -15,44 +15,46 @@ def get_token_list_for_any(tree: Tree, rules: List[str]):
         return []
 
     for tree in root.find_data("in_expression"):
-        for child1 in get_token_list_for_any(tree, ["letter_character", "quoted_identifier"]):
+        for child1 in get_token_list_for_any(
+            tree, ["letter_character", "quoted_identifier"]
+        ):
             return child1.children[0].value
 
 
 def test_parse_m_query1():
-    expression: str = "let\n    Source = Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n    PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n    TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n    TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n    TESTTABLE_Table"
+    expression: str = 'let\n    Source = Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","PBI_TEST_WAREHOUSE_PROD",[Role="PBI_TEST_MEMBER"]),\n    PBI_TEST_Database = Source{[Name="PBI_TEST",Kind="Database"]}[Data],\n    TEST_Schema = PBI_TEST_Database{[Name="TEST",Kind="Schema"]}[Data],\n    TESTTABLE_Table = TEST_Schema{[Name="TESTTABLE",Kind="Table"]}[Data]\nin\n    TESTTABLE_Table'
     parse_tree: Tree = m_parser.parse_expression(expression)
     assert get_output_dataset(parse_tree) == "TESTTABLE_Table"
 
 
 def test_parse_m_query2():
-    expression: str = "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n    #\"ADDed Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n    #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n    #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n    #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n    #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n    then [UNIT] * 361\nelse 0),\n    #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n    #\"Added Custom2\""
+    expression: str = 'let\n    Source = Value.NativeQuery(Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","operations_analytics_warehouse_prod",[Role="OPERATIONS_ANALYTICS_MEMBER"]){[Name="OPERATIONS_ANALYTICS"]}[Data], "SELECT#(lf)concat((UPPER(REPLACE(SELLER,\'-\',\'\'))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4", null, [EnableFolding=true]),\n    #"ADDed Conditional Column" = Table.AddColumn(Source, "SME Units ENT", each if [DEAL_TYPE] = "SME Unit" then [UNIT] else 0),\n    #"Added Conditional Column1" = Table.AddColumn(#"Added Conditional Column", "Banklink Units", each if [DEAL_TYPE] = "Banklink" then [UNIT] else 0),\n    #"Removed Columns" = Table.RemoveColumns(#"Added Conditional Column1",{"Banklink Units"}),\n    #"Added Custom" = Table.AddColumn(#"Removed Columns", "Banklink Units", each if [DEAL_TYPE] = "Banklink" and [SALES_TYPE] = "3 - Upsell"\nthen [UNIT]\n\nelse if [SALES_TYPE] = "Adjusted BL Migration"\nthen [UNIT]\n\nelse 0),\n    #"Added Custom1" = Table.AddColumn(#"Added Custom", "SME Units in $ (*$361)", each if [DEAL_TYPE] = "SME Unit" \nand [SALES_TYPE] <> "4 - Renewal"\n    then [UNIT] * 361\nelse 0),\n    #"Added Custom2" = Table.AddColumn(#"Added Custom1", "Banklink in $ (*$148)", each [Banklink Units] * 148)\nin\n    #"Added Custom2"'
     parse_tree: Tree = m_parser.parse_expression(expression)
-    assert get_output_dataset(parse_tree) == "\"Added Custom2\""
+    assert get_output_dataset(parse_tree) == '"Added Custom2"'
 
 
 def test_parse_m_query3():
-    expression: str = "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select #(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS Agent,#(lf)TIER,#(lf)UPPER(MANAGER),#(lf)TEAM_TYPE,#(lf)DATE_TARGET,#(lf)MONTHID,#(lf)TARGET_TEAM,#(lf)SELLER_EMAIL,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY,#(lf)UNIT_TARGET AS SME_Quota,#(lf)AMV_TARGET AS Revenue_Quota,#(lf)SERVICE_QUOTA,#(lf)BL_TARGET,#(lf)SOFTWARE_QUOTA as Software_Quota#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Accounting'#(lf)and TARGET_TEAM = 'Enterprise'\", null, [EnableFolding=true]),\n    #\"Added Conditional Column\" = Table.AddColumn(Source, \"Has PS Software Quota?\", each if [TIER] = \"Expansion (Medium)\" then \"Yes\" else if [TIER] = \"Acquisition\" then \"Yes\" else \"No\")\nin\n    #\"Added Conditional Column\""
+    expression: str = 'let\n    Source = Value.NativeQuery(Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","operations_analytics_warehouse_prod",[Role="OPERATIONS_ANALYTICS_MEMBER"]){[Name="OPERATIONS_ANALYTICS"]}[Data], "select #(lf)UPPER(REPLACE(AGENT_NAME,\'-\',\'\')) AS Agent,#(lf)TIER,#(lf)UPPER(MANAGER),#(lf)TEAM_TYPE,#(lf)DATE_TARGET,#(lf)MONTHID,#(lf)TARGET_TEAM,#(lf)SELLER_EMAIL,#(lf)concat((UPPER(REPLACE(AGENT_NAME,\'-\',\'\'))), MONTHID) as AGENT_KEY,#(lf)UNIT_TARGET AS SME_Quota,#(lf)AMV_TARGET AS Revenue_Quota,#(lf)SERVICE_QUOTA,#(lf)BL_TARGET,#(lf)SOFTWARE_QUOTA as Software_Quota#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = \'Accounting\'#(lf)and TARGET_TEAM = \'Enterprise\'", null, [EnableFolding=true]),\n    #"Added Conditional Column" = Table.AddColumn(Source, "Has PS Software Quota?", each if [TIER] = "Expansion (Medium)" then "Yes" else if [TIER] = "Acquisition" then "Yes" else "No")\nin\n    #"Added Conditional Column"'
     parse_tree: Tree = m_parser.parse_expression(expression)
-    assert get_output_dataset(parse_tree) == "\"Added Conditional Column\""
+    assert get_output_dataset(parse_tree) == '"Added Conditional Column"'
 
 
 def test_parse_m_query4():
-    expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *#(lf),concat((UPPER(REPLACE(CLIENT_MANAGER_QUOTED,'-',''))), MONTHID) as AGENT_KEY#(lf),concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY#(lf)#(lf)from V_OIP_ENT_2022\"]),\n    #\"Added Custom\" = Table.AddColumn(Source, \"OIP in $(*$350)\", each [SALES_INVOICE_AMOUNT] * 350),\n    #\"Changed Type\" = Table.TransformColumnTypes(#\"Added Custom\",{{\"OIP in $(*$350)\", type number}})\nin\n    #\"Changed Type\""
+    expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="select *#(lf),concat((UPPER(REPLACE(CLIENT_MANAGER_QUOTED,\'-\',\'\'))), MONTHID) as AGENT_KEY#(lf),concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTHID) as CD_AGENT_KEY#(lf)#(lf)from V_OIP_ENT_2022"]),\n    #"Added Custom" = Table.AddColumn(Source, "OIP in $(*$350)", each [SALES_INVOICE_AMOUNT] * 350),\n    #"Changed Type" = Table.TransformColumnTypes(#"Added Custom",{{"OIP in $(*$350)", type number}})\nin\n    #"Changed Type"'
     parse_tree: Tree = m_parser.parse_expression(expression)
-    assert get_output_dataset(parse_tree) == "\"Changed Type\""
+    assert get_output_dataset(parse_tree) == '"Changed Type"'
 
 
 def test_parse_m_query5():
-    expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"Select *,#(lf)#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_QUOTED,'-',''))), #(lf)LEFT(CAST(DTE AS DATE),4)+LEFT(RIGHT(CAST(DTE AS DATE),5),2)) AS AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), #(lf)LEFT(CAST(DTE AS DATE),4)+LEFT(RIGHT(CAST(DTE AS DATE),5),2)) AS CD_AGENT_KEY#(lf)#(lf)from V_INVOICE_BOOKING_2022\"]),\n    #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"CLIENT_ID\", Int64.Type}}),\n    #\"Added Conditional Column\" = Table.AddColumn(#\"Changed Type\", \"PS Software (One-Off)\", each if Text.Contains([REVENUE_TYPE], \"Software\") then [Inv_Amt] else if Text.Contains([REVENUE_TYPE], \"Tax Seminar\") then [Inv_Amt] else 0),\n    #\"Filtered Rows\" = Table.SelectRows(#\"Added Conditional Column\", each true),\n    #\"Duplicated Column\" = Table.DuplicateColumn(#\"Filtered Rows\", \"CLIENT_ID\", \"CLIENT_ID - Copy\"),\n    #\"Changed Type1\" = Table.TransformColumnTypes(#\"Duplicated Column\",{{\"CLIENT_ID - Copy\", type text}}),\n    #\"Renamed Columns\" = Table.RenameColumns(#\"Changed Type1\",{{\"CLIENT_ID - Copy\", \"CLIENT_ID for Filter\"}})\nin\n    #\"Renamed Columns\""
+    expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="Select *,#(lf)#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_QUOTED,\'-\',\'\'))), #(lf)LEFT(CAST(DTE AS DATE),4)+LEFT(RIGHT(CAST(DTE AS DATE),5),2)) AS AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), #(lf)LEFT(CAST(DTE AS DATE),4)+LEFT(RIGHT(CAST(DTE AS DATE),5),2)) AS CD_AGENT_KEY#(lf)#(lf)from V_INVOICE_BOOKING_2022"]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"CLIENT_ID", Int64.Type}}),\n    #"Added Conditional Column" = Table.AddColumn(#"Changed Type", "PS Software (One-Off)", each if Text.Contains([REVENUE_TYPE], "Software") then [Inv_Amt] else if Text.Contains([REVENUE_TYPE], "Tax Seminar") then [Inv_Amt] else 0),\n    #"Filtered Rows" = Table.SelectRows(#"Added Conditional Column", each true),\n    #"Duplicated Column" = Table.DuplicateColumn(#"Filtered Rows", "CLIENT_ID", "CLIENT_ID - Copy"),\n    #"Changed Type1" = Table.TransformColumnTypes(#"Duplicated Column",{{"CLIENT_ID - Copy", type text}}),\n    #"Renamed Columns" = Table.RenameColumns(#"Changed Type1",{{"CLIENT_ID - Copy", "CLIENT_ID for Filter"}})\nin\n    #"Renamed Columns"'
     parse_tree: Tree = m_parser.parse_expression(expression)
-    assert get_output_dataset(parse_tree) == "\"Renamed Columns\""
+    assert get_output_dataset(parse_tree) == '"Renamed Columns"'
 
 
 def test_parse_m_query6():
-    expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"SELECT *,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), #(lf)LEFT(CAST(MONTH_DATE AS DATE),4)+LEFT(RIGHT(CAST(MONTH_DATE AS DATE),5),2)) AS AGENT_KEY#(lf)#(lf)FROM dbo.V_ARR_ADDS\"]),\n    #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"MONTH_DATE\", type date}}),\n    #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([MONTH_DATE]))\nin\n    #\"Added Custom\""
+    expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="SELECT *,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,\'-\',\'\'))), #(lf)LEFT(CAST(MONTH_DATE AS DATE),4)+LEFT(RIGHT(CAST(MONTH_DATE AS DATE),5),2)) AS AGENT_KEY#(lf)#(lf)FROM dbo.V_ARR_ADDS"]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"MONTH_DATE", type date}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "Month", each Date.Month([MONTH_DATE]))\nin\n    #"Added Custom"'
     parse_tree: Tree = m_parser.parse_expression(expression)
-    assert get_output_dataset(parse_tree) == "\"Added Custom\""
+    assert get_output_dataset(parse_tree) == '"Added Custom"'
 
 
 def test_parse_m_query7():
@@ -62,21 +64,21 @@ def test_parse_m_query7():
 
 
 def test_parse_m_query8():
-    expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION\", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"mth_date\", type date}}),\n    #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Month\", each Date.Month([mth_date])),\n    #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n    #\"Added Custom1\""
+    expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,\'-\',\'\'))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"mth_date", type date}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "Month", each Date.Month([mth_date])),\n    #"Added Custom1" = Table.AddColumn(#"Added Custom", "TPV Opening", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n    #"Added Custom1"'
     parse_tree: Tree = m_parser.parse_expression(expression)
-    assert get_output_dataset(parse_tree) == "\"Added Custom1\""
+    assert get_output_dataset(parse_tree) == '"Added Custom1"'
 
 
 def test_parse_m_query9():
-    expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"select#(lf)CLIENT_ID,#(lf)PARTNER_ACCOUNT_NAME,#(lf)CM_CLOSING_MNTH_COUNTRY,#(lf)MONTH_WID,#(lf)PS_DELETES,#(lf)CLIENT_MANAGER_CLOSING_MONTH,#(lf)SME_DELETES,#(lf)TPV_AMV_OPENING,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,'-',''))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_TPV_LEADERBOARD\", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #\"Changed Type\" = Table.TransformColumnTypes(Source,{{\"MONTH_WID\", type text}}),\n    #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"MONTH_DATE\", each Date.FromText(\nText.Range([MONTH_WID], 0,4) & \"-\"  &\nText.Range([MONTH_WID], 4,2)\n)),\n    #\"Added Custom2\" = Table.AddColumn(#\"Added Custom\", \"Month\", each Date.Month([MONTH_DATE])),\n    #\"Added Custom1\" = Table.AddColumn(#\"Added Custom2\", \"TPV Opening\", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n    #\"Added Custom1\""
+    expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="select#(lf)CLIENT_ID,#(lf)PARTNER_ACCOUNT_NAME,#(lf)CM_CLOSING_MNTH_COUNTRY,#(lf)MONTH_WID,#(lf)PS_DELETES,#(lf)CLIENT_MANAGER_CLOSING_MONTH,#(lf)SME_DELETES,#(lf)TPV_AMV_OPENING,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,\'-\',\'\'))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_TPV_LEADERBOARD", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"MONTH_WID", type text}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "MONTH_DATE", each Date.FromText(\nText.Range([MONTH_WID], 0,4) & "-"  &\nText.Range([MONTH_WID], 4,2)\n)),\n    #"Added Custom2" = Table.AddColumn(#"Added Custom", "Month", each Date.Month([MONTH_DATE])),\n    #"Added Custom1" = Table.AddColumn(#"Added Custom2", "TPV Opening", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n    #"Added Custom1"'
     parse_tree: Tree = m_parser.parse_expression(expression)
-    assert get_output_dataset(parse_tree) == "\"Added Custom1\""
+    assert get_output_dataset(parse_tree) == '"Added Custom1"'
 
 
 def test_parse_m_query10():
-    expression: str = "let\n    Source = Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"OPERATIONS_ANALYTICS_WAREHOUSE_PROD\",[Role=\"OPERATIONS_ANALYTICS_MEMBER_AD\"]),\n    OPERATIONS_ANALYTICS_Database = Source{[Name=\"OPERATIONS_ANALYTICS\",Kind=\"Database\"]}[Data],\n    TEST_Schema = OPERATIONS_ANALYTICS_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n    LZ_MIGRATION_DOWNLOAD_View = TEST_Schema{[Name=\"LZ_MIGRATION_DOWNLOAD\",Kind=\"View\"]}[Data],\n    #\"Changed Type\" = Table.TransformColumnTypes(LZ_MIGRATION_DOWNLOAD_View,{{\"MIGRATION_MONTH_ID\", type text}}),\n    #\"Added Custom\" = Table.AddColumn(#\"Changed Type\", \"Migration Month\", each Date.FromText(\nText.Range([MIGRATION_MONTH_ID], 0,4) & \"-\" & \nText.Range([MIGRATION_MONTH_ID], 4,2) \n)),\n    #\"Changed Type1\" = Table.TransformColumnTypes(#\"Added Custom\",{{\"Migration Month\", type date}})\nin\n    #\"Changed Type1\""
+    expression: str = 'let\n    Source = Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","OPERATIONS_ANALYTICS_WAREHOUSE_PROD",[Role="OPERATIONS_ANALYTICS_MEMBER_AD"]),\n    OPERATIONS_ANALYTICS_Database = Source{[Name="OPERATIONS_ANALYTICS",Kind="Database"]}[Data],\n    TEST_Schema = OPERATIONS_ANALYTICS_Database{[Name="TEST",Kind="Schema"]}[Data],\n    LZ_MIGRATION_DOWNLOAD_View = TEST_Schema{[Name="LZ_MIGRATION_DOWNLOAD",Kind="View"]}[Data],\n    #"Changed Type" = Table.TransformColumnTypes(LZ_MIGRATION_DOWNLOAD_View,{{"MIGRATION_MONTH_ID", type text}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "Migration Month", each Date.FromText(\nText.Range([MIGRATION_MONTH_ID], 0,4) & "-" & \nText.Range([MIGRATION_MONTH_ID], 4,2) \n)),\n    #"Changed Type1" = Table.TransformColumnTypes(#"Added Custom",{{"Migration Month", type date}})\nin\n    #"Changed Type1"'
     parse_tree: Tree = m_parser.parse_expression(expression)
-    assert get_output_dataset(parse_tree) == "\"Changed Type1\""
+    assert get_output_dataset(parse_tree) == '"Changed Type1"'
 
 
 def test_parse_m_query11():
@@ -86,6 +88,6 @@ def test_parse_m_query11():
 
 
 def test_parse_m_query12():
-    expression: str = "let\n    Source = Sql.Database(\"AUPRDWHDB\", \"COMMOPSDB\", [Query=\"Select#(lf)*,#(lf)concat((UPPER(REPLACE(SALES_SPECIALIST,'-',''))),#(lf)LEFT(CAST(INVOICE_DATE AS DATE),4)+LEFT(RIGHT(CAST(INVOICE_DATE AS DATE),5),2)) AS AGENT_KEY,#(lf)CASE#(lf)    WHEN CLASS = 'Software' and (NOT(PRODUCT in ('ADV', 'Adv') and left(ACCOUNT_ID,2)='10') #(lf)    or V_ENTERPRISE_INVOICED_REVENUE.TYPE = 'Manual Adjustment') THEN INVOICE_AMOUNT#(lf)    WHEN V_ENTERPRISE_INVOICED_REVENUE.TYPE IN ('Recurring','0') THEN INVOICE_AMOUNT#(lf)    ELSE 0#(lf)END as SOFTWARE_INV#(lf)#(lf)from V_ENTERPRISE_INVOICED_REVENUE\", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #\"Added Conditional Column\" = Table.AddColumn(Source, \"Services\", each if [CLASS] = \"Services\" then [INVOICE_AMOUNT] else 0),\n    #\"Added Custom\" = Table.AddColumn(#\"Added Conditional Column\", \"Advanced New Sites\", each if [PRODUCT] = \"ADV\"\nor [PRODUCT] = \"Adv\"\nthen [NEW_SITE]\nelse 0)\nin\n    #\"Added Custom\""
+    expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="Select#(lf)*,#(lf)concat((UPPER(REPLACE(SALES_SPECIALIST,\'-\',\'\'))),#(lf)LEFT(CAST(INVOICE_DATE AS DATE),4)+LEFT(RIGHT(CAST(INVOICE_DATE AS DATE),5),2)) AS AGENT_KEY,#(lf)CASE#(lf)    WHEN CLASS = \'Software\' and (NOT(PRODUCT in (\'ADV\', \'Adv\') and left(ACCOUNT_ID,2)=\'10\') #(lf)    or V_ENTERPRISE_INVOICED_REVENUE.TYPE = \'Manual Adjustment\') THEN INVOICE_AMOUNT#(lf)    WHEN V_ENTERPRISE_INVOICED_REVENUE.TYPE IN (\'Recurring\',\'0\') THEN INVOICE_AMOUNT#(lf)    ELSE 0#(lf)END as SOFTWARE_INV#(lf)#(lf)from V_ENTERPRISE_INVOICED_REVENUE", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #"Added Conditional Column" = Table.AddColumn(Source, "Services", each if [CLASS] = "Services" then [INVOICE_AMOUNT] else 0),\n    #"Added Custom" = Table.AddColumn(#"Added Conditional Column", "Advanced New Sites", each if [PRODUCT] = "ADV"\nor [PRODUCT] = "Adv"\nthen [NEW_SITE]\nelse 0)\nin\n    #"Added Custom"'
     parse_tree: Tree = m_parser.parse_expression(expression)
-    assert get_output_dataset(parse_tree) == "\"Added Custom\""
+    assert get_output_dataset(parse_tree) == '"Added Custom"'
diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
index bbd60f856bd96..46ec1b00081c3 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
@@ -125,6 +125,11 @@ def register_mock_api(request_mock):
                                                 "expression": "dummy",
                                             }
                                         ],
+                                        "datasourceUsages": [
+                                            {
+                                                "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3",
+                                            }
+                                        ]
                                     }
                                 ],
                             }

From b4dd785bdc96c18346458f7a603f8049e6ba8e91 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Thu, 24 Nov 2022 21:01:15 +0530
Subject: [PATCH 08/53] Current behaviour

---
 .../ingestion/source/powerbi/m_parser.py      |  17 ++
 .../ingestion/source/powerbi/powerbi.py       |   1 +
 .../integration/powerbi/test_m_parser.py      | 171 +++++++++---------
 3 files changed, 101 insertions(+), 88 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
index 2a9f96742f59b..3d9a5723ad017 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
@@ -1,11 +1,28 @@
 import importlib.resources as pkg_resource
 import logging
+from typing import List
 
 from lark import Lark, Tree
 
 logger = logging.getLogger(__name__)
 
 
+def get_output_dataset(root: Tree):
+    def get_token_list_for_any(tree: Tree, rules: List[str]):
+        for rule in rules:
+            token_list = [x for x in tree.find_data(rule)]
+            if len(token_list) > 0:
+                return token_list
+
+        return []
+
+    for tree in root.find_data("in_expression"):
+        for child1 in get_token_list_for_any(
+            tree, ["letter_character", "quoted_identifier"]
+        ):
+            return child1.children[0].value
+
+
 def parse_expression(expression: str) -> Tree:
     grammar: str = pkg_resource.read_text(
         "datahub.ingestion.source.powerbi", "powerbi-lexical-grammar.rule"
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
index b2a2a7fe62ad4..ac9782f1dcf7a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
@@ -1095,6 +1095,7 @@ def init_dashboard_tiles(workspace: PowerBiAPI.Workspace) -> None:
         )
         # Get workspace dashboards
         workspace.dashboards = self.get_dashboards(workspace)
+
         workspace.datasets = json_to_dataset_map(scan_result)
         init_dashboard_tiles(workspace)
 
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index 7703b93240fc9..15d9261898212 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -1,93 +1,88 @@
 from typing import List
-
+import lark
 from lark import Tree
 
 from datahub.ingestion.source.powerbi import m_parser
 
-
-def get_output_dataset(root: Tree):
-    def get_token_list_for_any(tree: Tree, rules: List[str]):
-        for rule in rules:
-            token_list = [x for x in tree.find_data(rule)]
-            if len(token_list) > 0:
-                return token_list
-
-        return []
-
-    for tree in root.find_data("in_expression"):
-        for child1 in get_token_list_for_any(
-            tree, ["letter_character", "quoted_identifier"]
-        ):
-            return child1.children[0].value
-
-
-def test_parse_m_query1():
-    expression: str = 'let\n    Source = Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","PBI_TEST_WAREHOUSE_PROD",[Role="PBI_TEST_MEMBER"]),\n    PBI_TEST_Database = Source{[Name="PBI_TEST",Kind="Database"]}[Data],\n    TEST_Schema = PBI_TEST_Database{[Name="TEST",Kind="Schema"]}[Data],\n    TESTTABLE_Table = TEST_Schema{[Name="TESTTABLE",Kind="Table"]}[Data]\nin\n    TESTTABLE_Table'
-    parse_tree: Tree = m_parser.parse_expression(expression)
-    assert get_output_dataset(parse_tree) == "TESTTABLE_Table"
-
-
-def test_parse_m_query2():
-    expression: str = 'let\n    Source = Value.NativeQuery(Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","operations_analytics_warehouse_prod",[Role="OPERATIONS_ANALYTICS_MEMBER"]){[Name="OPERATIONS_ANALYTICS"]}[Data], "SELECT#(lf)concat((UPPER(REPLACE(SELLER,\'-\',\'\'))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4", null, [EnableFolding=true]),\n    #"ADDed Conditional Column" = Table.AddColumn(Source, "SME Units ENT", each if [DEAL_TYPE] = "SME Unit" then [UNIT] else 0),\n    #"Added Conditional Column1" = Table.AddColumn(#"Added Conditional Column", "Banklink Units", each if [DEAL_TYPE] = "Banklink" then [UNIT] else 0),\n    #"Removed Columns" = Table.RemoveColumns(#"Added Conditional Column1",{"Banklink Units"}),\n    #"Added Custom" = Table.AddColumn(#"Removed Columns", "Banklink Units", each if [DEAL_TYPE] = "Banklink" and [SALES_TYPE] = "3 - Upsell"\nthen [UNIT]\n\nelse if [SALES_TYPE] = "Adjusted BL Migration"\nthen [UNIT]\n\nelse 0),\n    #"Added Custom1" = Table.AddColumn(#"Added Custom", "SME Units in $ (*$361)", each if [DEAL_TYPE] = "SME Unit" \nand [SALES_TYPE] <> "4 - Renewal"\n    then [UNIT] * 361\nelse 0),\n    #"Added Custom2" = Table.AddColumn(#"Added Custom1", "Banklink in $ (*$148)", each [Banklink Units] * 148)\nin\n    #"Added Custom2"'
-    parse_tree: Tree = m_parser.parse_expression(expression)
-    assert get_output_dataset(parse_tree) == '"Added Custom2"'
-
-
-def test_parse_m_query3():
-    expression: str = 'let\n    Source = Value.NativeQuery(Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","operations_analytics_warehouse_prod",[Role="OPERATIONS_ANALYTICS_MEMBER"]){[Name="OPERATIONS_ANALYTICS"]}[Data], "select #(lf)UPPER(REPLACE(AGENT_NAME,\'-\',\'\')) AS Agent,#(lf)TIER,#(lf)UPPER(MANAGER),#(lf)TEAM_TYPE,#(lf)DATE_TARGET,#(lf)MONTHID,#(lf)TARGET_TEAM,#(lf)SELLER_EMAIL,#(lf)concat((UPPER(REPLACE(AGENT_NAME,\'-\',\'\'))), MONTHID) as AGENT_KEY,#(lf)UNIT_TARGET AS SME_Quota,#(lf)AMV_TARGET AS Revenue_Quota,#(lf)SERVICE_QUOTA,#(lf)BL_TARGET,#(lf)SOFTWARE_QUOTA as Software_Quota#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = \'Accounting\'#(lf)and TARGET_TEAM = \'Enterprise\'", null, [EnableFolding=true]),\n    #"Added Conditional Column" = Table.AddColumn(Source, "Has PS Software Quota?", each if [TIER] = "Expansion (Medium)" then "Yes" else if [TIER] = "Acquisition" then "Yes" else "No")\nin\n    #"Added Conditional Column"'
-    parse_tree: Tree = m_parser.parse_expression(expression)
-    assert get_output_dataset(parse_tree) == '"Added Conditional Column"'
-
-
-def test_parse_m_query4():
-    expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="select *#(lf),concat((UPPER(REPLACE(CLIENT_MANAGER_QUOTED,\'-\',\'\'))), MONTHID) as AGENT_KEY#(lf),concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTHID) as CD_AGENT_KEY#(lf)#(lf)from V_OIP_ENT_2022"]),\n    #"Added Custom" = Table.AddColumn(Source, "OIP in $(*$350)", each [SALES_INVOICE_AMOUNT] * 350),\n    #"Changed Type" = Table.TransformColumnTypes(#"Added Custom",{{"OIP in $(*$350)", type number}})\nin\n    #"Changed Type"'
-    parse_tree: Tree = m_parser.parse_expression(expression)
-    assert get_output_dataset(parse_tree) == '"Changed Type"'
-
-
-def test_parse_m_query5():
-    expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="Select *,#(lf)#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_QUOTED,\'-\',\'\'))), #(lf)LEFT(CAST(DTE AS DATE),4)+LEFT(RIGHT(CAST(DTE AS DATE),5),2)) AS AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), #(lf)LEFT(CAST(DTE AS DATE),4)+LEFT(RIGHT(CAST(DTE AS DATE),5),2)) AS CD_AGENT_KEY#(lf)#(lf)from V_INVOICE_BOOKING_2022"]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"CLIENT_ID", Int64.Type}}),\n    #"Added Conditional Column" = Table.AddColumn(#"Changed Type", "PS Software (One-Off)", each if Text.Contains([REVENUE_TYPE], "Software") then [Inv_Amt] else if Text.Contains([REVENUE_TYPE], "Tax Seminar") then [Inv_Amt] else 0),\n    #"Filtered Rows" = Table.SelectRows(#"Added Conditional Column", each true),\n    #"Duplicated Column" = Table.DuplicateColumn(#"Filtered Rows", "CLIENT_ID", "CLIENT_ID - Copy"),\n    #"Changed Type1" = Table.TransformColumnTypes(#"Duplicated Column",{{"CLIENT_ID - Copy", type text}}),\n    #"Renamed Columns" = Table.RenameColumns(#"Changed Type1",{{"CLIENT_ID - Copy", "CLIENT_ID for Filter"}})\nin\n    #"Renamed Columns"'
-    parse_tree: Tree = m_parser.parse_expression(expression)
-    assert get_output_dataset(parse_tree) == '"Renamed Columns"'
-
-
-def test_parse_m_query6():
-    expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="SELECT *,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,\'-\',\'\'))), #(lf)LEFT(CAST(MONTH_DATE AS DATE),4)+LEFT(RIGHT(CAST(MONTH_DATE AS DATE),5),2)) AS AGENT_KEY#(lf)#(lf)FROM dbo.V_ARR_ADDS"]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"MONTH_DATE", type date}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "Month", each Date.Month([MONTH_DATE]))\nin\n    #"Added Custom"'
-    parse_tree: Tree = m_parser.parse_expression(expression)
-    assert get_output_dataset(parse_tree) == '"Added Custom"'
-
-
-def test_parse_m_query7():
-    expression: str = "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select #(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS CLIENT_DIRECTOR,#(lf)TIER,#(lf)UPPER(MANAGER),#(lf)TEAM_TYPE,#(lf)DATE_TARGET,#(lf)MONTHID,#(lf)TARGET_TEAM,#(lf)SELLER_EMAIL,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY,#(lf)UNIT_TARGET AS SME_Quota,#(lf)AMV_TARGET AS Revenue_Quota,#(lf)SERVICE_QUOTA,#(lf)BL_TARGET,#(lf)SOFTWARE_QUOTA as Software_Quota#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Accounting'#(lf)and TARGET_TEAM = 'Enterprise'#(lf)AND TIER = 'Client Director'\", null, [EnableFolding=true])\nin\n    Source"
-    parse_tree: Tree = m_parser.parse_expression(expression)
-    assert get_output_dataset(parse_tree) == "Source"
-
-
-def test_parse_m_query8():
-    expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,\'-\',\'\'))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"mth_date", type date}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "Month", each Date.Month([mth_date])),\n    #"Added Custom1" = Table.AddColumn(#"Added Custom", "TPV Opening", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n    #"Added Custom1"'
-    parse_tree: Tree = m_parser.parse_expression(expression)
-    assert get_output_dataset(parse_tree) == '"Added Custom1"'
-
-
-def test_parse_m_query9():
-    expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="select#(lf)CLIENT_ID,#(lf)PARTNER_ACCOUNT_NAME,#(lf)CM_CLOSING_MNTH_COUNTRY,#(lf)MONTH_WID,#(lf)PS_DELETES,#(lf)CLIENT_MANAGER_CLOSING_MONTH,#(lf)SME_DELETES,#(lf)TPV_AMV_OPENING,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,\'-\',\'\'))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_TPV_LEADERBOARD", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"MONTH_WID", type text}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "MONTH_DATE", each Date.FromText(\nText.Range([MONTH_WID], 0,4) & "-"  &\nText.Range([MONTH_WID], 4,2)\n)),\n    #"Added Custom2" = Table.AddColumn(#"Added Custom", "Month", each Date.Month([MONTH_DATE])),\n    #"Added Custom1" = Table.AddColumn(#"Added Custom2", "TPV Opening", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n    #"Added Custom1"'
-    parse_tree: Tree = m_parser.parse_expression(expression)
-    assert get_output_dataset(parse_tree) == '"Added Custom1"'
-
-
-def test_parse_m_query10():
-    expression: str = 'let\n    Source = Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","OPERATIONS_ANALYTICS_WAREHOUSE_PROD",[Role="OPERATIONS_ANALYTICS_MEMBER_AD"]),\n    OPERATIONS_ANALYTICS_Database = Source{[Name="OPERATIONS_ANALYTICS",Kind="Database"]}[Data],\n    TEST_Schema = OPERATIONS_ANALYTICS_Database{[Name="TEST",Kind="Schema"]}[Data],\n    LZ_MIGRATION_DOWNLOAD_View = TEST_Schema{[Name="LZ_MIGRATION_DOWNLOAD",Kind="View"]}[Data],\n    #"Changed Type" = Table.TransformColumnTypes(LZ_MIGRATION_DOWNLOAD_View,{{"MIGRATION_MONTH_ID", type text}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "Migration Month", each Date.FromText(\nText.Range([MIGRATION_MONTH_ID], 0,4) & "-" & \nText.Range([MIGRATION_MONTH_ID], 4,2) \n)),\n    #"Changed Type1" = Table.TransformColumnTypes(#"Added Custom",{{"Migration Month", type date}})\nin\n    #"Changed Type1"'
-    parse_tree: Tree = m_parser.parse_expression(expression)
-    assert get_output_dataset(parse_tree) == '"Changed Type1"'
-
-
-def test_parse_m_query11():
-    expression: str = "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select *,#(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS Agent,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Industries'#(lf)and TARGET_TEAM = 'Enterprise'\", null, [EnableFolding=true])\nin\n    Source"
-    parse_tree: Tree = m_parser.parse_expression(expression)
-    assert get_output_dataset(parse_tree) == "Source"
-
-
-def test_parse_m_query12():
-    expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="Select#(lf)*,#(lf)concat((UPPER(REPLACE(SALES_SPECIALIST,\'-\',\'\'))),#(lf)LEFT(CAST(INVOICE_DATE AS DATE),4)+LEFT(RIGHT(CAST(INVOICE_DATE AS DATE),5),2)) AS AGENT_KEY,#(lf)CASE#(lf)    WHEN CLASS = \'Software\' and (NOT(PRODUCT in (\'ADV\', \'Adv\') and left(ACCOUNT_ID,2)=\'10\') #(lf)    or V_ENTERPRISE_INVOICED_REVENUE.TYPE = \'Manual Adjustment\') THEN INVOICE_AMOUNT#(lf)    WHEN V_ENTERPRISE_INVOICED_REVENUE.TYPE IN (\'Recurring\',\'0\') THEN INVOICE_AMOUNT#(lf)    ELSE 0#(lf)END as SOFTWARE_INV#(lf)#(lf)from V_ENTERPRISE_INVOICED_REVENUE", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #"Added Conditional Column" = Table.AddColumn(Source, "Services", each if [CLASS] = "Services" then [INVOICE_AMOUNT] else 0),\n    #"Added Custom" = Table.AddColumn(#"Added Conditional Column", "Advanced New Sites", each if [PRODUCT] = "ADV"\nor [PRODUCT] = "Adv"\nthen [NEW_SITE]\nelse 0)\nin\n    #"Added Custom"'
-    parse_tree: Tree = m_parser.parse_expression(expression)
-    assert get_output_dataset(parse_tree) == '"Added Custom"'
+# def test_parse_m_query1():
+#     expression: str = 'let\n    Source = Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","PBI_TEST_WAREHOUSE_PROD",[Role="PBI_TEST_MEMBER"]),\n    PBI_TEST_Database = Source{[Name="PBI_TEST",Kind="Database"]}[Data],\n    TEST_Schema = PBI_TEST_Database{[Name="TEST",Kind="Schema"]}[Data],\n    TESTTABLE_Table = TEST_Schema{[Name="TESTTABLE",Kind="Table"]}[Data]\nin\n    TESTTABLE_Table'
+#     parse_tree: Tree = m_parser.parse_expression(expression)
+#     assert get_output_dataset(parse_tree) == "TESTTABLE_Table"
+#
+#
+# def test_parse_m_query2():
+#     expression: str = 'let\n    Source = Value.NativeQuery(Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","operations_analytics_warehouse_prod",[Role="OPERATIONS_ANALYTICS_MEMBER"]){[Name="OPERATIONS_ANALYTICS"]}[Data], "SELECT#(lf)concat((UPPER(REPLACE(SELLER,\'-\',\'\'))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4", null, [EnableFolding=true]),\n    #"ADDed Conditional Column" = Table.AddColumn(Source, "SME Units ENT", each if [DEAL_TYPE] = "SME Unit" then [UNIT] else 0),\n    #"Added Conditional Column1" = Table.AddColumn(#"Added Conditional Column", "Banklink Units", each if [DEAL_TYPE] = "Banklink" then [UNIT] else 0),\n    #"Removed Columns" = Table.RemoveColumns(#"Added Conditional Column1",{"Banklink Units"}),\n    #"Added Custom" = Table.AddColumn(#"Removed Columns", "Banklink Units", each if [DEAL_TYPE] = "Banklink" and [SALES_TYPE] = "3 - Upsell"\nthen [UNIT]\n\nelse if [SALES_TYPE] = "Adjusted BL Migration"\nthen [UNIT]\n\nelse 0),\n    #"Added Custom1" = Table.AddColumn(#"Added Custom", "SME Units in $ (*$361)", each if [DEAL_TYPE] = "SME Unit" \nand [SALES_TYPE] <> "4 - Renewal"\n    then [UNIT] * 361\nelse 0),\n    #"Added Custom2" = Table.AddColumn(#"Added Custom1", "Banklink in $ (*$148)", each [Banklink Units] * 148)\nin\n    #"Added Custom2"'
+#     parse_tree: Tree = m_parser.parse_expression(expression)
+#     assert get_output_dataset(parse_tree) == '"Added Custom2"'
+#
+#
+# def test_parse_m_query3():
+#     expression: str = 'let\n    Source = Value.NativeQuery(Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","operations_analytics_warehouse_prod",[Role="OPERATIONS_ANALYTICS_MEMBER"]){[Name="OPERATIONS_ANALYTICS"]}[Data], "select #(lf)UPPER(REPLACE(AGENT_NAME,\'-\',\'\')) AS Agent,#(lf)TIER,#(lf)UPPER(MANAGER),#(lf)TEAM_TYPE,#(lf)DATE_TARGET,#(lf)MONTHID,#(lf)TARGET_TEAM,#(lf)SELLER_EMAIL,#(lf)concat((UPPER(REPLACE(AGENT_NAME,\'-\',\'\'))), MONTHID) as AGENT_KEY,#(lf)UNIT_TARGET AS SME_Quota,#(lf)AMV_TARGET AS Revenue_Quota,#(lf)SERVICE_QUOTA,#(lf)BL_TARGET,#(lf)SOFTWARE_QUOTA as Software_Quota#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = \'Accounting\'#(lf)and TARGET_TEAM = \'Enterprise\'", null, [EnableFolding=true]),\n    #"Added Conditional Column" = Table.AddColumn(Source, "Has PS Software Quota?", each if [TIER] = "Expansion (Medium)" then "Yes" else if [TIER] = "Acquisition" then "Yes" else "No")\nin\n    #"Added Conditional Column"'
+#     parse_tree: Tree = m_parser.parse_expression(expression)
+#     assert get_output_dataset(parse_tree) == '"Added Conditional Column"'
+#
+#
+# def test_parse_m_query4():
+#     expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="select *#(lf),concat((UPPER(REPLACE(CLIENT_MANAGER_QUOTED,\'-\',\'\'))), MONTHID) as AGENT_KEY#(lf),concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTHID) as CD_AGENT_KEY#(lf)#(lf)from V_OIP_ENT_2022"]),\n    #"Added Custom" = Table.AddColumn(Source, "OIP in $(*$350)", each [SALES_INVOICE_AMOUNT] * 350),\n    #"Changed Type" = Table.TransformColumnTypes(#"Added Custom",{{"OIP in $(*$350)", type number}})\nin\n    #"Changed Type"'
+#     parse_tree: Tree = m_parser.parse_expression(expression)
+#     assert get_output_dataset(parse_tree) == '"Changed Type"'
+#
+#
+# def test_parse_m_query5():
+#     expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="Select *,#(lf)#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_QUOTED,\'-\',\'\'))), #(lf)LEFT(CAST(DTE AS DATE),4)+LEFT(RIGHT(CAST(DTE AS DATE),5),2)) AS AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), #(lf)LEFT(CAST(DTE AS DATE),4)+LEFT(RIGHT(CAST(DTE AS DATE),5),2)) AS CD_AGENT_KEY#(lf)#(lf)from V_INVOICE_BOOKING_2022"]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"CLIENT_ID", Int64.Type}}),\n    #"Added Conditional Column" = Table.AddColumn(#"Changed Type", "PS Software (One-Off)", each if Text.Contains([REVENUE_TYPE], "Software") then [Inv_Amt] else if Text.Contains([REVENUE_TYPE], "Tax Seminar") then [Inv_Amt] else 0),\n    #"Filtered Rows" = Table.SelectRows(#"Added Conditional Column", each true),\n    #"Duplicated Column" = Table.DuplicateColumn(#"Filtered Rows", "CLIENT_ID", "CLIENT_ID - Copy"),\n    #"Changed Type1" = Table.TransformColumnTypes(#"Duplicated Column",{{"CLIENT_ID - Copy", type text}}),\n    #"Renamed Columns" = Table.RenameColumns(#"Changed Type1",{{"CLIENT_ID - Copy", "CLIENT_ID for Filter"}})\nin\n    #"Renamed Columns"'
+#     parse_tree: Tree = m_parser.parse_expression(expression)
+#     assert get_output_dataset(parse_tree) == '"Renamed Columns"'
+#
+#
+# def test_parse_m_query6():
+#     expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="SELECT *,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,\'-\',\'\'))), #(lf)LEFT(CAST(MONTH_DATE AS DATE),4)+LEFT(RIGHT(CAST(MONTH_DATE AS DATE),5),2)) AS AGENT_KEY#(lf)#(lf)FROM dbo.V_ARR_ADDS"]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"MONTH_DATE", type date}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "Month", each Date.Month([MONTH_DATE]))\nin\n    #"Added Custom"'
+#     parse_tree: Tree = m_parser.parse_expression(expression)
+#     assert get_output_dataset(parse_tree) == '"Added Custom"'
+#
+#
+# def test_parse_m_query7():
+#     expression: str = "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select #(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS CLIENT_DIRECTOR,#(lf)TIER,#(lf)UPPER(MANAGER),#(lf)TEAM_TYPE,#(lf)DATE_TARGET,#(lf)MONTHID,#(lf)TARGET_TEAM,#(lf)SELLER_EMAIL,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY,#(lf)UNIT_TARGET AS SME_Quota,#(lf)AMV_TARGET AS Revenue_Quota,#(lf)SERVICE_QUOTA,#(lf)BL_TARGET,#(lf)SOFTWARE_QUOTA as Software_Quota#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Accounting'#(lf)and TARGET_TEAM = 'Enterprise'#(lf)AND TIER = 'Client Director'\", null, [EnableFolding=true])\nin\n    Source"
+#     parse_tree: Tree = m_parser.parse_expression(expression)
+#     assert get_output_dataset(parse_tree) == "Source"
+#
+#
+# def test_parse_m_query8():
+#     expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,\'-\',\'\'))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"mth_date", type date}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "Month", each Date.Month([mth_date])),\n    #"Added Custom1" = Table.AddColumn(#"Added Custom", "TPV Opening", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n    #"Added Custom1"'
+#     parse_tree: Tree = m_parser.parse_expression(expression)
+#     assert get_output_dataset(parse_tree) == '"Added Custom1"'
+#
+#
+# def test_parse_m_query9():
+#     expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="select#(lf)CLIENT_ID,#(lf)PARTNER_ACCOUNT_NAME,#(lf)CM_CLOSING_MNTH_COUNTRY,#(lf)MONTH_WID,#(lf)PS_DELETES,#(lf)CLIENT_MANAGER_CLOSING_MONTH,#(lf)SME_DELETES,#(lf)TPV_AMV_OPENING,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,\'-\',\'\'))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_TPV_LEADERBOARD", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"MONTH_WID", type text}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "MONTH_DATE", each Date.FromText(\nText.Range([MONTH_WID], 0,4) & "-"  &\nText.Range([MONTH_WID], 4,2)\n)),\n    #"Added Custom2" = Table.AddColumn(#"Added Custom", "Month", each Date.Month([MONTH_DATE])),\n    #"Added Custom1" = Table.AddColumn(#"Added Custom2", "TPV Opening", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n    #"Added Custom1"'
+#     parse_tree: Tree = m_parser.parse_expression(expression)
+#     assert get_output_dataset(parse_tree) == '"Added Custom1"'
+#
+#
+# def test_parse_m_query10():
+#     expression: str = 'let\n    Source = Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","OPERATIONS_ANALYTICS_WAREHOUSE_PROD",[Role="OPERATIONS_ANALYTICS_MEMBER_AD"]),\n    OPERATIONS_ANALYTICS_Database = Source{[Name="OPERATIONS_ANALYTICS",Kind="Database"]}[Data],\n    TEST_Schema = OPERATIONS_ANALYTICS_Database{[Name="TEST",Kind="Schema"]}[Data],\n    LZ_MIGRATION_DOWNLOAD_View = TEST_Schema{[Name="LZ_MIGRATION_DOWNLOAD",Kind="View"]}[Data],\n    #"Changed Type" = Table.TransformColumnTypes(LZ_MIGRATION_DOWNLOAD_View,{{"MIGRATION_MONTH_ID", type text}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "Migration Month", each Date.FromText(\nText.Range([MIGRATION_MONTH_ID], 0,4) & "-" & \nText.Range([MIGRATION_MONTH_ID], 4,2) \n)),\n    #"Changed Type1" = Table.TransformColumnTypes(#"Added Custom",{{"Migration Month", type date}})\nin\n    #"Changed Type1"'
+#     parse_tree: Tree = m_parser.parse_expression(expression)
+#     assert get_output_dataset(parse_tree) == '"Changed Type1"'
+#
+#
+# def test_parse_m_query11():
+#     expression: str = "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select *,#(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS Agent,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Industries'#(lf)and TARGET_TEAM = 'Enterprise'\", null, [EnableFolding=true])\nin\n    Source"
+#     parse_tree: Tree = m_parser.parse_expression(expression)
+#     assert get_output_dataset(parse_tree) == "Source"
+#
+#
+# def test_parse_m_query12():
+#     expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="Select#(lf)*,#(lf)concat((UPPER(REPLACE(SALES_SPECIALIST,\'-\',\'\'))),#(lf)LEFT(CAST(INVOICE_DATE AS DATE),4)+LEFT(RIGHT(CAST(INVOICE_DATE AS DATE),5),2)) AS AGENT_KEY,#(lf)CASE#(lf)    WHEN CLASS = \'Software\' and (NOT(PRODUCT in (\'ADV\', \'Adv\') and left(ACCOUNT_ID,2)=\'10\') #(lf)    or V_ENTERPRISE_INVOICED_REVENUE.TYPE = \'Manual Adjustment\') THEN INVOICE_AMOUNT#(lf)    WHEN V_ENTERPRISE_INVOICED_REVENUE.TYPE IN (\'Recurring\',\'0\') THEN INVOICE_AMOUNT#(lf)    ELSE 0#(lf)END as SOFTWARE_INV#(lf)#(lf)from V_ENTERPRISE_INVOICED_REVENUE", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #"Added Conditional Column" = Table.AddColumn(Source, "Services", each if [CLASS] = "Services" then [INVOICE_AMOUNT] else 0),\n    #"Added Custom" = Table.AddColumn(#"Added Conditional Column", "Advanced New Sites", each if [PRODUCT] = "ADV"\nor [PRODUCT] = "Adv"\nthen [NEW_SITE]\nelse 0)\nin\n    #"Added Custom"'
+#     parse_tree: Tree = m_parser.parse_expression(expression)
+#     assert get_output_dataset(parse_tree) == '"Added Custom"'
+
+def test_find_schema_detail():
+    expression: str = "let\n    Source = Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]),\n    GSL_TEST_DB_Database = Source{[Name=\"GSL_TEST_DB\",Kind=\"Database\"]}[Data],\n    DG_RBAC_ACL_Schema = GSL_TEST_DB_Database{[Name=\"DG_RBAC_ACL\",Kind=\"Schema\"]}[Data],\n    SALES_Table = DG_RBAC_ACL_Schema{[Name=\"SALES\",Kind=\"Table\"]}[Data]\nin\n    SALES_Table"
+    full_name: str = m_parser.find_full_name_and_lineage()
+
+def test_x():
+    expression: str = "CALCULATE(SUM('Invoiced Revenue'[Services]),Targets[SERVICE_QUOTA] > 0)"
+    try:
+        m_parser.parse_expression(expression)
+        assert 1 != 1
+    except lark.exceptions.UnexpectedCharacters:
+        pass

From 281bc568571038c9d352326c252812e9fde5ba1b Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Thu, 24 Nov 2022 22:39:11 +0530
Subject: [PATCH 09/53] new behaviour where data-platform is powerbi

---
 .../ingestion/source/powerbi/m_parser.py      |  10 +-
 .../ingestion/source/powerbi/powerbi.py       | 135 +++------------
 .../golden_test_disabled_ownership.json       |  10 +-
 .../powerbi/golden_test_ingest.json           |  10 +-
 .../powerbi/golden_test_report.json           |  20 +--
 .../integration/powerbi/test_m_parser.py      | 155 ++++++++----------
 .../tests/integration/powerbi/test_powerbi.py |   2 +-
 7 files changed, 124 insertions(+), 218 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
index 3d9a5723ad017..28d940ed12e37 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
@@ -1,14 +1,14 @@
 import importlib.resources as pkg_resource
 import logging
-from typing import List
+from typing import List, Optional
 
 from lark import Lark, Tree
 
 logger = logging.getLogger(__name__)
 
 
-def get_output_dataset(root: Tree):
-    def get_token_list_for_any(tree: Tree, rules: List[str]):
+def get_output_dataset(root: Tree) -> Optional[str]:
+    def get_token_list_for_any(tree: Tree, rules: List[str]) -> List[Tree]:
         for rule in rules:
             token_list = [x for x in tree.find_data(rule)]
             if len(token_list) > 0:
@@ -20,7 +20,9 @@ def get_token_list_for_any(tree: Tree, rules: List[str]):
         for child1 in get_token_list_for_any(
             tree, ["letter_character", "quoted_identifier"]
         ):
-            return child1.children[0].value
+            return child1.children[0].value  # type: ignore
+
+    return None
 
 
 def parse_expression(expression: str) -> Tree:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
index ac9782f1dcf7a..ec4f25fa784ee 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
@@ -191,7 +191,7 @@ class Workspace:
         name: str
         state: str
         dashboards: List[Any]
-        datasets: Dict[str, "PowerBiAPI.Dataset"]
+        datasets: Dict[str, "PowerBiAPI.PowerBIDataset"]
 
     @dataclass
     class DataSource:
@@ -222,7 +222,7 @@ class PowerBIDataset:
         class Table:
             name: str
             full_name: str
-            data_source: "PowerBiAPI.DataSource"  # We are supporting single data_source for the table
+            expression: Optional[str]
 
         id: str
         name: str
@@ -286,7 +286,7 @@ class Report:
         webUrl: str
         embedUrl: str
         description: str
-        dataset: Optional["PowerBiAPI.Dataset"]
+        dataset: Optional["PowerBiAPI.PowerBIDataset"]
         pages: List["PowerBiAPI.Page"]
         users: List["PowerBiAPI.User"]
 
@@ -304,7 +304,7 @@ class CreatedFrom(Enum):
         id: str
         title: str
         embedUrl: str
-        dataset: Optional["PowerBiAPI.Dataset"]
+        dataset: Optional["PowerBiAPI.PowerBIDataset"]
         report: Optional[Any]
         createdFrom: CreatedFrom
 
@@ -584,7 +584,7 @@ def get_dataset(self, workspace_id: str, dataset_id: str) -> Any:
 
     def get_data_sources(
         self, dataset: PowerBIDataset
-    ) -> Dict[str, "PowerBiAPI.DataSource"]:
+    ) -> Optional[Dict[str, "PowerBiAPI.DataSource"]]:
         """
         Fetch the data source from PowerBi for the given dataset
         """
@@ -721,40 +721,6 @@ def new_dataset_or_report(tile_instance: Any) -> dict:
 
         return tiles
 
-    def process_extension_table(
-        self, data_source: "PowerBiAPI.DataSource", raw_table: dict
-    ) -> (str, str, str):
-        # All below four condition should meet to process the Extension data-source type
-        if data_source.type != "Extension":
-            LOGGER.debug(f"data_source ({data_source.id}) type is not Extension")
-            return None, None
-        if data_source.raw_connection_detail.get("connectionDetails") is None:
-            LOGGER.debug(
-                f"data_source ({data_source.id}) type is missing connectionDetails"
-            )
-            return None, None
-        if (
-            data_source.raw_connection_detail["connectionDetails"].get(
-                "extensionDataSourceKind"
-            )
-            is None
-        ):
-            LOGGER.debug(
-                f"data_source ({data_source.id}) type is missing extensionDataSourceKind"
-            )
-            return None, None
-
-        if (
-            data_source.raw_connection_detail["connectionDetails"][
-                "extensionDataSourceKind"
-            ]
-            not in self.__config.dataset_type_mapping
-        ):
-            LOGGER.debug(f"expected platforms are {self.__config.dataset_type_mapping}")
-            return None, None
-        # fake and foo need to be find out from M-Query
-        return raw_table["name"], "foo_db.fake_schema.{}".format(raw_table["name"])
-
     def get_pages_by_report(
         self, workspace_id: str, report_id: str
     ) -> List["PowerBiAPI.Page"]:
@@ -983,78 +949,27 @@ def json_to_dataset_map(scan_result: dict) -> dict:
                     dataset_id=dataset_dict["id"],
                 )
                 dataset_map[dataset_instance.id] = dataset_instance
-                # Map of data-source attached to this dataset
-                data_source_map: Dict[
-                    str, PowerBiAPI.DataSource
-                ] = self.get_data_sources(dataset_instance)
+                # set dataset-name
+                dataset_name: str = (
+                    dataset_instance.name
+                    if dataset_instance.name is not None
+                    else dataset_instance.id
+                )
+
                 for table in dataset_dict["tables"]:
-                    warning_key_prefix: str = "{}_{}".format(
-                        dataset_dict.get("id") if dataset_dict.get("name") is None else dataset_dict.get("name"), table["name"]
+                    expression: str = (
+                        table["source"][0]["expression"]
+                        if table.get("source") is not None and len(table["source"]) > 0
+                        else None
                     )
-
-                    if table.get("source") is None:
-                        reporter.report_warning(
-                            f"{warning_key_prefix}-source",
-                            "table without source is not supported",
-                        )
-                        continue
-
-                    if "Value.NativeQuery(" in table["source"][0]["expression"]:
-                        reporter.report_warning(
-                            f"{warning_key_prefix}-native-query",
-                            "NativeQuery is not supported",
-                        )
-                        continue
-
-                    if table.get("datasourceUsages") is None:
-                        reporter.report_warning(
-                            f"{warning_key_prefix}-no-source",
-                            "table does not have any source",
-                        )
-                        continue
-
-                    if len(table["datasourceUsages"]) > 1:
-                        reporter.report_warning(
-                            f"{warning_key_prefix}-many-source",
-                            "Multiple data-sources for single table is not supported",
-                        )
-                        continue
-
-                    data_source: PowerBiAPI.DataSource = data_source_map[
-                        table["datasourceUsages"][0]["datasourceInstanceId"]
-                    ]
-                    table_name: str = None
-                    table_full_name: str = None
-                    if data_source.type == "Extension":
-                        table_name, table_full_name = self.process_extension_table(
-                            data_source, table
-                        )
-                    elif (
-                        self.__config.dataset_type_mapping.get(data_source.type)
-                        is not None
-                    ):
-                        # PowerBi table name contains schema name and table name. Format is <SchemaName> <TableName>
-                        table_name = table["name"].split(" ")[1]
-                        table_schema_name: str = table["name"].split(" ")[0]
-                        database_name: str = data_source.raw_connection_detail[
-                            "database"
-                        ]
-                        table_full_name = (
-                            f"{database_name}.{table_schema_name}.{table_name}"
-                        )
-
-                    if None in (table_name, table_full_name):
-                        reporter.report_warning(
-                            f"{warning_key_prefix}-extension",
-                            f"The table source ({data_source.id}) is not belongs to supported platforms: {self.__config.dataset_type_mapping}",
-                        )
-                        continue
-
                     dataset_instance.tables.append(
                         PowerBiAPI.PowerBIDataset.Table(
-                            full_name=table_full_name,
-                            name=table_name,
-                            data_source=data_source,
+                            name=table["name"],
+                            full_name="{}.{}".format(
+                                dataset_name.replace(" ", "_"),
+                                table["name"].replace(" ", "_"),
+                            ),
+                            expression=expression,
                         )
                     )
 
@@ -1172,14 +1087,16 @@ def __to_datahub_dataset(
         for table in dataset.tables:
             # Create a URN for dataset
             ds_urn = builder.make_dataset_urn(
-                platform=self.__config.dataset_type_mapping[table.data_source.type],
+                platform=self.__config.platform_name,
                 name=f"{table.full_name}",
                 env=self.__config.env,
             )
 
             LOGGER.info(f"{Constant.Dataset_URN}={ds_urn}")
             # Create datasetProperties mcp
-            ds_properties = DatasetPropertiesClass(description=table.name)
+            ds_properties = DatasetPropertiesClass(
+                name=table.name, description=table.name
+            )
 
             info_mcp = self.new_mcp(
                 entity_type=Constant.DATASET,
diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json b/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json
index 10aa0d3295e66..2154e4d7c2b56 100644
--- a/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json
@@ -1,11 +1,11 @@
 [
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,library_db.public.issue_history,DEV)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
     "changeType": "UPSERT",
     "aspectName": "datasetProperties",
     "aspect": {
-        "value": "{\"customProperties\": {}, \"description\": \"issue_history\", \"tags\": []}",
+        "value": "{\"customProperties\": {}, \"name\": \"public issue_history\", \"description\": \"public issue_history\", \"tags\": []}",
         "contentType": "application/json"
     },
     "systemMetadata": {
@@ -15,7 +15,7 @@
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,library_db.public.issue_history,DEV)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
     "changeType": "UPSERT",
     "aspectName": "status",
     "aspect": {
@@ -33,7 +33,7 @@
     "changeType": "UPSERT",
     "aspectName": "chartInfo",
     "aspect": {
-        "value": "{\"customProperties\": {\"datasetId\": \"05169CD2-E713-41E6-9600-1D8066D95445\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"test_tile\", \"description\": \"test_tile\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:postgres,library_db.public.issue_history,DEV)\"}]}",
+        "value": "{\"customProperties\": {\"datasetId\": \"05169CD2-E713-41E6-9600-1D8066D95445\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"test_tile\", \"description\": \"test_tile\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}]}",
         "contentType": "application/json"
     },
     "systemMetadata": {
@@ -125,4 +125,4 @@
         "runId": "powerbi-test"
     }
 }
-]
\ No newline at end of file
+]
diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json b/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json
index 49bdb95b08602..331e4fde518dd 100644
--- a/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json
@@ -1,11 +1,11 @@
 [
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,library_db.public.issue_history,DEV)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
     "changeType": "UPSERT",
     "aspectName": "datasetProperties",
     "aspect": {
-        "value": "{\"customProperties\": {}, \"description\": \"issue_history\", \"tags\": []}",
+        "value": "{\"customProperties\": {}, \"name\": \"public issue_history\", \"description\": \"public issue_history\", \"tags\": []}",
         "contentType": "application/json"
     },
     "systemMetadata": {
@@ -15,7 +15,7 @@
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,library_db.public.issue_history,DEV)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
     "changeType": "UPSERT",
     "aspectName": "status",
     "aspect": {
@@ -117,7 +117,7 @@
     "changeType": "UPSERT",
     "aspectName": "chartInfo",
     "aspect": {
-        "value": "{\"customProperties\": {\"datasetId\": \"05169CD2-E713-41E6-9600-1D8066D95445\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"test_tile\", \"description\": \"test_tile\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:postgres,library_db.public.issue_history,DEV)\"}]}",
+        "value": "{\"customProperties\": {\"datasetId\": \"05169CD2-E713-41E6-9600-1D8066D95445\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"test_tile\", \"description\": \"test_tile\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}]}",
         "contentType": "application/json"
     },
     "systemMetadata": {
@@ -223,4 +223,4 @@
         "runId": "powerbi-test"
     }
 }
-]
\ No newline at end of file
+]
diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_report.json b/metadata-ingestion/tests/integration/powerbi/golden_test_report.json
index 20b51df7734a6..cfafce5d452a5 100644
--- a/metadata-ingestion/tests/integration/powerbi/golden_test_report.json
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_report.json
@@ -1,11 +1,11 @@
 [
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,library_db.public.issue_history,DEV)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
     "changeType": "UPSERT",
     "aspectName": "datasetProperties",
     "aspect": {
-        "value": "{\"customProperties\": {}, \"description\": \"issue_history\", \"tags\": []}",
+        "value": "{\"customProperties\": {}, \"name\": \"public issue_history\", \"description\": \"public issue_history\", \"tags\": []}",
         "contentType": "application/json"
     },
     "systemMetadata": {
@@ -15,7 +15,7 @@
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,library_db.public.issue_history,DEV)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
     "changeType": "UPSERT",
     "aspectName": "status",
     "aspect": {
@@ -117,7 +117,7 @@
     "changeType": "UPSERT",
     "aspectName": "chartInfo",
     "aspect": {
-        "value": "{\"customProperties\": {\"datasetId\": \"05169CD2-E713-41E6-9600-1D8066D95445\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"test_tile\", \"description\": \"test_tile\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:postgres,library_db.public.issue_history,DEV)\"}]}",
+        "value": "{\"customProperties\": {\"datasetId\": \"05169CD2-E713-41E6-9600-1D8066D95445\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"test_tile\", \"description\": \"test_tile\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}]}",
         "contentType": "application/json"
     },
     "systemMetadata": {
@@ -225,11 +225,11 @@
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,library_db.public.issue_history,DEV)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
     "changeType": "UPSERT",
     "aspectName": "datasetProperties",
     "aspect": {
-        "value": "{\"customProperties\": {}, \"description\": \"issue_history\", \"tags\": []}",
+        "value": "{\"customProperties\": {}, \"name\": \"public issue_history\", \"description\": \"public issue_history\", \"tags\": []}",
         "contentType": "application/json"
     },
     "systemMetadata": {
@@ -239,7 +239,7 @@
 },
 {
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,library_db.public.issue_history,DEV)",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
     "changeType": "UPSERT",
     "aspectName": "status",
     "aspect": {
@@ -341,7 +341,7 @@
     "changeType": "UPSERT",
     "aspectName": "chartInfo",
     "aspect": {
-        "value": "{\"customProperties\": {\"order\": \"0\"}, \"title\": \"ReportSection\", \"description\": \"Regional Sales Analysis\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:postgres,library_db.public.issue_history,DEV)\"}]}",
+        "value": "{\"customProperties\": {\"order\": \"0\"}, \"title\": \"ReportSection\", \"description\": \"Regional Sales Analysis\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}]}",
         "contentType": "application/json"
     },
     "systemMetadata": {
@@ -369,7 +369,7 @@
     "changeType": "UPSERT",
     "aspectName": "chartInfo",
     "aspect": {
-        "value": "{\"customProperties\": {\"order\": \"1\"}, \"title\": \"ReportSection1\", \"description\": \"Geographic Analysis\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:postgres,library_db.public.issue_history,DEV)\"}]}",
+        "value": "{\"customProperties\": {\"order\": \"1\"}, \"title\": \"ReportSection1\", \"description\": \"Geographic Analysis\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}]}",
         "contentType": "application/json"
     },
     "systemMetadata": {
@@ -475,4 +475,4 @@
         "runId": "powerbi-test"
     }
 }
-]
\ No newline at end of file
+]
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index 15d9261898212..29497bdd122c2 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -1,88 +1,75 @@
-from typing import List
-import lark
 from lark import Tree
 
 from datahub.ingestion.source.powerbi import m_parser
 
-# def test_parse_m_query1():
-#     expression: str = 'let\n    Source = Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","PBI_TEST_WAREHOUSE_PROD",[Role="PBI_TEST_MEMBER"]),\n    PBI_TEST_Database = Source{[Name="PBI_TEST",Kind="Database"]}[Data],\n    TEST_Schema = PBI_TEST_Database{[Name="TEST",Kind="Schema"]}[Data],\n    TESTTABLE_Table = TEST_Schema{[Name="TESTTABLE",Kind="Table"]}[Data]\nin\n    TESTTABLE_Table'
-#     parse_tree: Tree = m_parser.parse_expression(expression)
-#     assert get_output_dataset(parse_tree) == "TESTTABLE_Table"
-#
-#
-# def test_parse_m_query2():
-#     expression: str = 'let\n    Source = Value.NativeQuery(Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","operations_analytics_warehouse_prod",[Role="OPERATIONS_ANALYTICS_MEMBER"]){[Name="OPERATIONS_ANALYTICS"]}[Data], "SELECT#(lf)concat((UPPER(REPLACE(SELLER,\'-\',\'\'))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4", null, [EnableFolding=true]),\n    #"ADDed Conditional Column" = Table.AddColumn(Source, "SME Units ENT", each if [DEAL_TYPE] = "SME Unit" then [UNIT] else 0),\n    #"Added Conditional Column1" = Table.AddColumn(#"Added Conditional Column", "Banklink Units", each if [DEAL_TYPE] = "Banklink" then [UNIT] else 0),\n    #"Removed Columns" = Table.RemoveColumns(#"Added Conditional Column1",{"Banklink Units"}),\n    #"Added Custom" = Table.AddColumn(#"Removed Columns", "Banklink Units", each if [DEAL_TYPE] = "Banklink" and [SALES_TYPE] = "3 - Upsell"\nthen [UNIT]\n\nelse if [SALES_TYPE] = "Adjusted BL Migration"\nthen [UNIT]\n\nelse 0),\n    #"Added Custom1" = Table.AddColumn(#"Added Custom", "SME Units in $ (*$361)", each if [DEAL_TYPE] = "SME Unit" \nand [SALES_TYPE] <> "4 - Renewal"\n    then [UNIT] * 361\nelse 0),\n    #"Added Custom2" = Table.AddColumn(#"Added Custom1", "Banklink in $ (*$148)", each [Banklink Units] * 148)\nin\n    #"Added Custom2"'
-#     parse_tree: Tree = m_parser.parse_expression(expression)
-#     assert get_output_dataset(parse_tree) == '"Added Custom2"'
-#
-#
-# def test_parse_m_query3():
-#     expression: str = 'let\n    Source = Value.NativeQuery(Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","operations_analytics_warehouse_prod",[Role="OPERATIONS_ANALYTICS_MEMBER"]){[Name="OPERATIONS_ANALYTICS"]}[Data], "select #(lf)UPPER(REPLACE(AGENT_NAME,\'-\',\'\')) AS Agent,#(lf)TIER,#(lf)UPPER(MANAGER),#(lf)TEAM_TYPE,#(lf)DATE_TARGET,#(lf)MONTHID,#(lf)TARGET_TEAM,#(lf)SELLER_EMAIL,#(lf)concat((UPPER(REPLACE(AGENT_NAME,\'-\',\'\'))), MONTHID) as AGENT_KEY,#(lf)UNIT_TARGET AS SME_Quota,#(lf)AMV_TARGET AS Revenue_Quota,#(lf)SERVICE_QUOTA,#(lf)BL_TARGET,#(lf)SOFTWARE_QUOTA as Software_Quota#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = \'Accounting\'#(lf)and TARGET_TEAM = \'Enterprise\'", null, [EnableFolding=true]),\n    #"Added Conditional Column" = Table.AddColumn(Source, "Has PS Software Quota?", each if [TIER] = "Expansion (Medium)" then "Yes" else if [TIER] = "Acquisition" then "Yes" else "No")\nin\n    #"Added Conditional Column"'
-#     parse_tree: Tree = m_parser.parse_expression(expression)
-#     assert get_output_dataset(parse_tree) == '"Added Conditional Column"'
-#
-#
-# def test_parse_m_query4():
-#     expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="select *#(lf),concat((UPPER(REPLACE(CLIENT_MANAGER_QUOTED,\'-\',\'\'))), MONTHID) as AGENT_KEY#(lf),concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTHID) as CD_AGENT_KEY#(lf)#(lf)from V_OIP_ENT_2022"]),\n    #"Added Custom" = Table.AddColumn(Source, "OIP in $(*$350)", each [SALES_INVOICE_AMOUNT] * 350),\n    #"Changed Type" = Table.TransformColumnTypes(#"Added Custom",{{"OIP in $(*$350)", type number}})\nin\n    #"Changed Type"'
-#     parse_tree: Tree = m_parser.parse_expression(expression)
-#     assert get_output_dataset(parse_tree) == '"Changed Type"'
-#
-#
-# def test_parse_m_query5():
-#     expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="Select *,#(lf)#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_QUOTED,\'-\',\'\'))), #(lf)LEFT(CAST(DTE AS DATE),4)+LEFT(RIGHT(CAST(DTE AS DATE),5),2)) AS AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), #(lf)LEFT(CAST(DTE AS DATE),4)+LEFT(RIGHT(CAST(DTE AS DATE),5),2)) AS CD_AGENT_KEY#(lf)#(lf)from V_INVOICE_BOOKING_2022"]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"CLIENT_ID", Int64.Type}}),\n    #"Added Conditional Column" = Table.AddColumn(#"Changed Type", "PS Software (One-Off)", each if Text.Contains([REVENUE_TYPE], "Software") then [Inv_Amt] else if Text.Contains([REVENUE_TYPE], "Tax Seminar") then [Inv_Amt] else 0),\n    #"Filtered Rows" = Table.SelectRows(#"Added Conditional Column", each true),\n    #"Duplicated Column" = Table.DuplicateColumn(#"Filtered Rows", "CLIENT_ID", "CLIENT_ID - Copy"),\n    #"Changed Type1" = Table.TransformColumnTypes(#"Duplicated Column",{{"CLIENT_ID - Copy", type text}}),\n    #"Renamed Columns" = Table.RenameColumns(#"Changed Type1",{{"CLIENT_ID - Copy", "CLIENT_ID for Filter"}})\nin\n    #"Renamed Columns"'
-#     parse_tree: Tree = m_parser.parse_expression(expression)
-#     assert get_output_dataset(parse_tree) == '"Renamed Columns"'
-#
-#
-# def test_parse_m_query6():
-#     expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="SELECT *,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,\'-\',\'\'))), #(lf)LEFT(CAST(MONTH_DATE AS DATE),4)+LEFT(RIGHT(CAST(MONTH_DATE AS DATE),5),2)) AS AGENT_KEY#(lf)#(lf)FROM dbo.V_ARR_ADDS"]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"MONTH_DATE", type date}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "Month", each Date.Month([MONTH_DATE]))\nin\n    #"Added Custom"'
-#     parse_tree: Tree = m_parser.parse_expression(expression)
-#     assert get_output_dataset(parse_tree) == '"Added Custom"'
-#
-#
-# def test_parse_m_query7():
-#     expression: str = "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select #(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS CLIENT_DIRECTOR,#(lf)TIER,#(lf)UPPER(MANAGER),#(lf)TEAM_TYPE,#(lf)DATE_TARGET,#(lf)MONTHID,#(lf)TARGET_TEAM,#(lf)SELLER_EMAIL,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY,#(lf)UNIT_TARGET AS SME_Quota,#(lf)AMV_TARGET AS Revenue_Quota,#(lf)SERVICE_QUOTA,#(lf)BL_TARGET,#(lf)SOFTWARE_QUOTA as Software_Quota#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Accounting'#(lf)and TARGET_TEAM = 'Enterprise'#(lf)AND TIER = 'Client Director'\", null, [EnableFolding=true])\nin\n    Source"
-#     parse_tree: Tree = m_parser.parse_expression(expression)
-#     assert get_output_dataset(parse_tree) == "Source"
-#
-#
-# def test_parse_m_query8():
-#     expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,\'-\',\'\'))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"mth_date", type date}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "Month", each Date.Month([mth_date])),\n    #"Added Custom1" = Table.AddColumn(#"Added Custom", "TPV Opening", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n    #"Added Custom1"'
-#     parse_tree: Tree = m_parser.parse_expression(expression)
-#     assert get_output_dataset(parse_tree) == '"Added Custom1"'
-#
-#
-# def test_parse_m_query9():
-#     expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="select#(lf)CLIENT_ID,#(lf)PARTNER_ACCOUNT_NAME,#(lf)CM_CLOSING_MNTH_COUNTRY,#(lf)MONTH_WID,#(lf)PS_DELETES,#(lf)CLIENT_MANAGER_CLOSING_MONTH,#(lf)SME_DELETES,#(lf)TPV_AMV_OPENING,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,\'-\',\'\'))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_TPV_LEADERBOARD", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"MONTH_WID", type text}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "MONTH_DATE", each Date.FromText(\nText.Range([MONTH_WID], 0,4) & "-"  &\nText.Range([MONTH_WID], 4,2)\n)),\n    #"Added Custom2" = Table.AddColumn(#"Added Custom", "Month", each Date.Month([MONTH_DATE])),\n    #"Added Custom1" = Table.AddColumn(#"Added Custom2", "TPV Opening", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n    #"Added Custom1"'
-#     parse_tree: Tree = m_parser.parse_expression(expression)
-#     assert get_output_dataset(parse_tree) == '"Added Custom1"'
-#
-#
-# def test_parse_m_query10():
-#     expression: str = 'let\n    Source = Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","OPERATIONS_ANALYTICS_WAREHOUSE_PROD",[Role="OPERATIONS_ANALYTICS_MEMBER_AD"]),\n    OPERATIONS_ANALYTICS_Database = Source{[Name="OPERATIONS_ANALYTICS",Kind="Database"]}[Data],\n    TEST_Schema = OPERATIONS_ANALYTICS_Database{[Name="TEST",Kind="Schema"]}[Data],\n    LZ_MIGRATION_DOWNLOAD_View = TEST_Schema{[Name="LZ_MIGRATION_DOWNLOAD",Kind="View"]}[Data],\n    #"Changed Type" = Table.TransformColumnTypes(LZ_MIGRATION_DOWNLOAD_View,{{"MIGRATION_MONTH_ID", type text}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "Migration Month", each Date.FromText(\nText.Range([MIGRATION_MONTH_ID], 0,4) & "-" & \nText.Range([MIGRATION_MONTH_ID], 4,2) \n)),\n    #"Changed Type1" = Table.TransformColumnTypes(#"Added Custom",{{"Migration Month", type date}})\nin\n    #"Changed Type1"'
-#     parse_tree: Tree = m_parser.parse_expression(expression)
-#     assert get_output_dataset(parse_tree) == '"Changed Type1"'
-#
-#
-# def test_parse_m_query11():
-#     expression: str = "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select *,#(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS Agent,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Industries'#(lf)and TARGET_TEAM = 'Enterprise'\", null, [EnableFolding=true])\nin\n    Source"
-#     parse_tree: Tree = m_parser.parse_expression(expression)
-#     assert get_output_dataset(parse_tree) == "Source"
-#
-#
-# def test_parse_m_query12():
-#     expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="Select#(lf)*,#(lf)concat((UPPER(REPLACE(SALES_SPECIALIST,\'-\',\'\'))),#(lf)LEFT(CAST(INVOICE_DATE AS DATE),4)+LEFT(RIGHT(CAST(INVOICE_DATE AS DATE),5),2)) AS AGENT_KEY,#(lf)CASE#(lf)    WHEN CLASS = \'Software\' and (NOT(PRODUCT in (\'ADV\', \'Adv\') and left(ACCOUNT_ID,2)=\'10\') #(lf)    or V_ENTERPRISE_INVOICED_REVENUE.TYPE = \'Manual Adjustment\') THEN INVOICE_AMOUNT#(lf)    WHEN V_ENTERPRISE_INVOICED_REVENUE.TYPE IN (\'Recurring\',\'0\') THEN INVOICE_AMOUNT#(lf)    ELSE 0#(lf)END as SOFTWARE_INV#(lf)#(lf)from V_ENTERPRISE_INVOICED_REVENUE", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #"Added Conditional Column" = Table.AddColumn(Source, "Services", each if [CLASS] = "Services" then [INVOICE_AMOUNT] else 0),\n    #"Added Custom" = Table.AddColumn(#"Added Conditional Column", "Advanced New Sites", each if [PRODUCT] = "ADV"\nor [PRODUCT] = "Adv"\nthen [NEW_SITE]\nelse 0)\nin\n    #"Added Custom"'
-#     parse_tree: Tree = m_parser.parse_expression(expression)
-#     assert get_output_dataset(parse_tree) == '"Added Custom"'
-
-def test_find_schema_detail():
-    expression: str = "let\n    Source = Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]),\n    GSL_TEST_DB_Database = Source{[Name=\"GSL_TEST_DB\",Kind=\"Database\"]}[Data],\n    DG_RBAC_ACL_Schema = GSL_TEST_DB_Database{[Name=\"DG_RBAC_ACL\",Kind=\"Schema\"]}[Data],\n    SALES_Table = DG_RBAC_ACL_Schema{[Name=\"SALES\",Kind=\"Table\"]}[Data]\nin\n    SALES_Table"
-    full_name: str = m_parser.find_full_name_and_lineage()
-
-def test_x():
-    expression: str = "CALCULATE(SUM('Invoiced Revenue'[Services]),Targets[SERVICE_QUOTA] > 0)"
-    try:
-        m_parser.parse_expression(expression)
-        assert 1 != 1
-    except lark.exceptions.UnexpectedCharacters:
-        pass
+
+def test_parse_m_query1():
+    expression: str = 'let\n    Source = Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","PBI_TEST_WAREHOUSE_PROD",[Role="PBI_TEST_MEMBER"]),\n    PBI_TEST_Database = Source{[Name="PBI_TEST",Kind="Database"]}[Data],\n    TEST_Schema = PBI_TEST_Database{[Name="TEST",Kind="Schema"]}[Data],\n    TESTTABLE_Table = TEST_Schema{[Name="TESTTABLE",Kind="Table"]}[Data]\nin\n    TESTTABLE_Table'
+    parse_tree: Tree = m_parser.parse_expression(expression)
+    assert m_parser.get_output_dataset(parse_tree) == "TESTTABLE_Table"
+
+
+def test_parse_m_query2():
+    expression: str = 'let\n    Source = Value.NativeQuery(Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","operations_analytics_warehouse_prod",[Role="OPERATIONS_ANALYTICS_MEMBER"]){[Name="OPERATIONS_ANALYTICS"]}[Data], "SELECT#(lf)concat((UPPER(REPLACE(SELLER,\'-\',\'\'))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4", null, [EnableFolding=true]),\n    #"ADDed Conditional Column" = Table.AddColumn(Source, "SME Units ENT", each if [DEAL_TYPE] = "SME Unit" then [UNIT] else 0),\n    #"Added Conditional Column1" = Table.AddColumn(#"Added Conditional Column", "Banklink Units", each if [DEAL_TYPE] = "Banklink" then [UNIT] else 0),\n    #"Removed Columns" = Table.RemoveColumns(#"Added Conditional Column1",{"Banklink Units"}),\n    #"Added Custom" = Table.AddColumn(#"Removed Columns", "Banklink Units", each if [DEAL_TYPE] = "Banklink" and [SALES_TYPE] = "3 - Upsell"\nthen [UNIT]\n\nelse if [SALES_TYPE] = "Adjusted BL Migration"\nthen [UNIT]\n\nelse 0),\n    #"Added Custom1" = Table.AddColumn(#"Added Custom", "SME Units in $ (*$361)", each if [DEAL_TYPE] = "SME Unit" \nand [SALES_TYPE] <> "4 - Renewal"\n    then [UNIT] * 361\nelse 0),\n    #"Added Custom2" = Table.AddColumn(#"Added Custom1", "Banklink in $ (*$148)", each [Banklink Units] * 148)\nin\n    #"Added Custom2"'
+    parse_tree: Tree = m_parser.parse_expression(expression)
+    assert m_parser.get_output_dataset(parse_tree) == '"Added Custom2"'
+
+
+def test_parse_m_query3():
+    expression: str = 'let\n    Source = Value.NativeQuery(Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","operations_analytics_warehouse_prod",[Role="OPERATIONS_ANALYTICS_MEMBER"]){[Name="OPERATIONS_ANALYTICS"]}[Data], "select #(lf)UPPER(REPLACE(AGENT_NAME,\'-\',\'\')) AS Agent,#(lf)TIER,#(lf)UPPER(MANAGER),#(lf)TEAM_TYPE,#(lf)DATE_TARGET,#(lf)MONTHID,#(lf)TARGET_TEAM,#(lf)SELLER_EMAIL,#(lf)concat((UPPER(REPLACE(AGENT_NAME,\'-\',\'\'))), MONTHID) as AGENT_KEY,#(lf)UNIT_TARGET AS SME_Quota,#(lf)AMV_TARGET AS Revenue_Quota,#(lf)SERVICE_QUOTA,#(lf)BL_TARGET,#(lf)SOFTWARE_QUOTA as Software_Quota#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = \'Accounting\'#(lf)and TARGET_TEAM = \'Enterprise\'", null, [EnableFolding=true]),\n    #"Added Conditional Column" = Table.AddColumn(Source, "Has PS Software Quota?", each if [TIER] = "Expansion (Medium)" then "Yes" else if [TIER] = "Acquisition" then "Yes" else "No")\nin\n    #"Added Conditional Column"'
+    parse_tree: Tree = m_parser.parse_expression(expression)
+    assert m_parser.get_output_dataset(parse_tree) == '"Added Conditional Column"'
+
+
+def test_parse_m_query4():
+    expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="select *#(lf),concat((UPPER(REPLACE(CLIENT_MANAGER_QUOTED,\'-\',\'\'))), MONTHID) as AGENT_KEY#(lf),concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTHID) as CD_AGENT_KEY#(lf)#(lf)from V_OIP_ENT_2022"]),\n    #"Added Custom" = Table.AddColumn(Source, "OIP in $(*$350)", each [SALES_INVOICE_AMOUNT] * 350),\n    #"Changed Type" = Table.TransformColumnTypes(#"Added Custom",{{"OIP in $(*$350)", type number}})\nin\n    #"Changed Type"'
+    parse_tree: Tree = m_parser.parse_expression(expression)
+    assert m_parser.get_output_dataset(parse_tree) == '"Changed Type"'
+
+
+def test_parse_m_query5():
+    expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="Select *,#(lf)#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_QUOTED,\'-\',\'\'))), #(lf)LEFT(CAST(DTE AS DATE),4)+LEFT(RIGHT(CAST(DTE AS DATE),5),2)) AS AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), #(lf)LEFT(CAST(DTE AS DATE),4)+LEFT(RIGHT(CAST(DTE AS DATE),5),2)) AS CD_AGENT_KEY#(lf)#(lf)from V_INVOICE_BOOKING_2022"]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"CLIENT_ID", Int64.Type}}),\n    #"Added Conditional Column" = Table.AddColumn(#"Changed Type", "PS Software (One-Off)", each if Text.Contains([REVENUE_TYPE], "Software") then [Inv_Amt] else if Text.Contains([REVENUE_TYPE], "Tax Seminar") then [Inv_Amt] else 0),\n    #"Filtered Rows" = Table.SelectRows(#"Added Conditional Column", each true),\n    #"Duplicated Column" = Table.DuplicateColumn(#"Filtered Rows", "CLIENT_ID", "CLIENT_ID - Copy"),\n    #"Changed Type1" = Table.TransformColumnTypes(#"Duplicated Column",{{"CLIENT_ID - Copy", type text}}),\n    #"Renamed Columns" = Table.RenameColumns(#"Changed Type1",{{"CLIENT_ID - Copy", "CLIENT_ID for Filter"}})\nin\n    #"Renamed Columns"'
+    parse_tree: Tree = m_parser.parse_expression(expression)
+    assert m_parser.get_output_dataset(parse_tree) == '"Renamed Columns"'
+
+
+def test_parse_m_query6():
+    expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="SELECT *,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,\'-\',\'\'))), #(lf)LEFT(CAST(MONTH_DATE AS DATE),4)+LEFT(RIGHT(CAST(MONTH_DATE AS DATE),5),2)) AS AGENT_KEY#(lf)#(lf)FROM dbo.V_ARR_ADDS"]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"MONTH_DATE", type date}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "Month", each Date.Month([MONTH_DATE]))\nin\n    #"Added Custom"'
+    parse_tree: Tree = m_parser.parse_expression(expression)
+    assert m_parser.get_output_dataset(parse_tree) == '"Added Custom"'
+
+
+def test_parse_m_query7():
+    expression: str = "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select #(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS CLIENT_DIRECTOR,#(lf)TIER,#(lf)UPPER(MANAGER),#(lf)TEAM_TYPE,#(lf)DATE_TARGET,#(lf)MONTHID,#(lf)TARGET_TEAM,#(lf)SELLER_EMAIL,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY,#(lf)UNIT_TARGET AS SME_Quota,#(lf)AMV_TARGET AS Revenue_Quota,#(lf)SERVICE_QUOTA,#(lf)BL_TARGET,#(lf)SOFTWARE_QUOTA as Software_Quota#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Accounting'#(lf)and TARGET_TEAM = 'Enterprise'#(lf)AND TIER = 'Client Director'\", null, [EnableFolding=true])\nin\n    Source"
+    parse_tree: Tree = m_parser.parse_expression(expression)
+    assert m_parser.get_output_dataset(parse_tree) == "Source"
+
+
+def test_parse_m_query8():
+    expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,\'-\',\'\'))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"mth_date", type date}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "Month", each Date.Month([mth_date])),\n    #"Added Custom1" = Table.AddColumn(#"Added Custom", "TPV Opening", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n    #"Added Custom1"'
+    parse_tree: Tree = m_parser.parse_expression(expression)
+    assert m_parser.get_output_dataset(parse_tree) == '"Added Custom1"'
+
+
+def test_parse_m_query9():
+    expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="select#(lf)CLIENT_ID,#(lf)PARTNER_ACCOUNT_NAME,#(lf)CM_CLOSING_MNTH_COUNTRY,#(lf)MONTH_WID,#(lf)PS_DELETES,#(lf)CLIENT_MANAGER_CLOSING_MONTH,#(lf)SME_DELETES,#(lf)TPV_AMV_OPENING,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,\'-\',\'\'))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_TPV_LEADERBOARD", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"MONTH_WID", type text}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "MONTH_DATE", each Date.FromText(\nText.Range([MONTH_WID], 0,4) & "-"  &\nText.Range([MONTH_WID], 4,2)\n)),\n    #"Added Custom2" = Table.AddColumn(#"Added Custom", "Month", each Date.Month([MONTH_DATE])),\n    #"Added Custom1" = Table.AddColumn(#"Added Custom2", "TPV Opening", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n    #"Added Custom1"'
+    parse_tree: Tree = m_parser.parse_expression(expression)
+    assert m_parser.get_output_dataset(parse_tree) == '"Added Custom1"'
+
+
+def test_parse_m_query10():
+    expression: str = 'let\n    Source = Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","OPERATIONS_ANALYTICS_WAREHOUSE_PROD",[Role="OPERATIONS_ANALYTICS_MEMBER_AD"]),\n    OPERATIONS_ANALYTICS_Database = Source{[Name="OPERATIONS_ANALYTICS",Kind="Database"]}[Data],\n    TEST_Schema = OPERATIONS_ANALYTICS_Database{[Name="TEST",Kind="Schema"]}[Data],\n    LZ_MIGRATION_DOWNLOAD_View = TEST_Schema{[Name="LZ_MIGRATION_DOWNLOAD",Kind="View"]}[Data],\n    #"Changed Type" = Table.TransformColumnTypes(LZ_MIGRATION_DOWNLOAD_View,{{"MIGRATION_MONTH_ID", type text}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "Migration Month", each Date.FromText(\nText.Range([MIGRATION_MONTH_ID], 0,4) & "-" & \nText.Range([MIGRATION_MONTH_ID], 4,2) \n)),\n    #"Changed Type1" = Table.TransformColumnTypes(#"Added Custom",{{"Migration Month", type date}})\nin\n    #"Changed Type1"'
+    parse_tree: Tree = m_parser.parse_expression(expression)
+    assert m_parser.get_output_dataset(parse_tree) == '"Changed Type1"'
+
+
+def test_parse_m_query11():
+    expression: str = "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select *,#(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS Agent,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Industries'#(lf)and TARGET_TEAM = 'Enterprise'\", null, [EnableFolding=true])\nin\n    Source"
+    parse_tree: Tree = m_parser.parse_expression(expression)
+    assert m_parser.get_output_dataset(parse_tree) == "Source"
+
+
+def test_parse_m_query12():
+    expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="Select#(lf)*,#(lf)concat((UPPER(REPLACE(SALES_SPECIALIST,\'-\',\'\'))),#(lf)LEFT(CAST(INVOICE_DATE AS DATE),4)+LEFT(RIGHT(CAST(INVOICE_DATE AS DATE),5),2)) AS AGENT_KEY,#(lf)CASE#(lf)    WHEN CLASS = \'Software\' and (NOT(PRODUCT in (\'ADV\', \'Adv\') and left(ACCOUNT_ID,2)=\'10\') #(lf)    or V_ENTERPRISE_INVOICED_REVENUE.TYPE = \'Manual Adjustment\') THEN INVOICE_AMOUNT#(lf)    WHEN V_ENTERPRISE_INVOICED_REVENUE.TYPE IN (\'Recurring\',\'0\') THEN INVOICE_AMOUNT#(lf)    ELSE 0#(lf)END as SOFTWARE_INV#(lf)#(lf)from V_ENTERPRISE_INVOICED_REVENUE", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #"Added Conditional Column" = Table.AddColumn(Source, "Services", each if [CLASS] = "Services" then [INVOICE_AMOUNT] else 0),\n    #"Added Custom" = Table.AddColumn(#"Added Conditional Column", "Advanced New Sites", each if [PRODUCT] = "ADV"\nor [PRODUCT] = "Adv"\nthen [NEW_SITE]\nelse 0)\nin\n    #"Added Custom"'
+    parse_tree: Tree = m_parser.parse_expression(expression)
+    assert m_parser.get_output_dataset(parse_tree) == '"Added Custom"'
diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
index ac608dcce9e9d..40d441b9cbc91 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
@@ -153,7 +153,7 @@ def register_mock_api(request_mock):
                                             {
                                                 "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3",
                                             }
-                                        ]
+                                        ],
                                     }
                                 ],
                             }

From 3b6a4224a10089786b81392f78d2d9716cf4e81e Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Thu, 24 Nov 2022 22:52:15 +0530
Subject: [PATCH 10/53] debug log

---
 .../src/datahub/ingestion/source/powerbi/powerbi.py           | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
index ec4f25fa784ee..ca41c3d0c429f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
@@ -998,9 +998,7 @@ def init_dashboard_tiles(workspace: PowerBiAPI.Workspace) -> None:
 
         # Scan is complete lets take the result
         scan_result = get_scan_result(scan_id=scan_id)
-        import json
-
-        print(json.dumps(scan_result, indent=1))
+        LOGGER.debug(f"scan result = {scan_result}")
         workspace = PowerBiAPI.Workspace(
             id=scan_result["id"],
             name=scan_result["name"],

From 90f6870a4393c7be9fe1299e4a4ecc42545b0dc4 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Thu, 1 Dec 2022 07:47:09 +0530
Subject: [PATCH 11/53] WIP

---
 .../data/template/StringArrayArray.class      | Bin 4132 -> 0 bytes
 .../ingestion/source/powerbi/powerbi.py       |  38 +++++++++++++++++-
 .../integration/powerbi/test_m_parser.py      |   7 ++++
 3 files changed, 44 insertions(+), 1 deletion(-)
 delete mode 100644 metadata-events/mxe-schemas/bin/mainGeneratedDataTemplate/com/linkedin/data/template/StringArrayArray.class

diff --git a/metadata-events/mxe-schemas/bin/mainGeneratedDataTemplate/com/linkedin/data/template/StringArrayArray.class b/metadata-events/mxe-schemas/bin/mainGeneratedDataTemplate/com/linkedin/data/template/StringArrayArray.class
deleted file mode 100644
index 6dfa8dc0081ea527ef6ffa88a3185dea78c8aa4f..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 4132
zcmbtWYje{^6g``mD3L`IObF%~Xan^lU|xlS1I0L@xHW{<CMgLN6j>w!$1;)(gg)uJ
ze}O;1*A`|7Ei;sFo#~J2^zKTwMIs9*%;4y;_uPBWId@n8{QKuK07vkvgf51(W-ex`
zM(u`PF>0BL>Zln<U#wSEN6(ZT%c#xgEK6MyKN2Dg`$7VyEwx^6hLk-RhMrRX%*5Fo
zL#nts*wz;GMKvQ5j8poOxX&`IA5m>vw;aQ)+2e+-&Q<ja1Dj#!9n+{{!#Tmwok&hG
zMDk`um$4T865e9yzpma=GgY-VpUGDVM8N>oF$`67+pzRXt_fRmRHtDJkeNPEMg<aj
zd$ECGP~~0Ey1%yI;qv7^Y{VuBafW2aKHTz8o7ST0l$Ywdg3Z`MDb9}<iR{k(uZ+ve
z*oI*V+ruT|j5UeA;22c}JFt@>T30PwcQF|%iDH`(SJdolRFdqx)+d;RcNh{Kd0C+^
zr+b%zHRvI^ibhR8*I1m>tuj}cAyzaswK}C*2EX?nt|hs&8)wzJci%H6JRl*>uz7V}
zURu$x9T_R?QILczQ_c*(+Q%@Q2xzJqk`<nE77X&no`7{72ZUT3NfeS(Jm-5n(?RNv
zS81r#yjoW^!&xE=7Z^5qk~B!OOx~<kb&VTcWbP{&^EGm&MNH&_*NwemRoN4S7{wS)
zNH|We7B*OsQ^5x~Nrl_ST}ns;Rp3OOQjo_u&8}Lh<f_$>ifJI3M$_ebYy~(mGW4RD
zG!veHCj%p1qcsU1G3;I~eULsfx>4kkoMo5^6+eJlJ9v-sO_kVZT;Ej4cnn(uU|RlS
zC<m1gRCjhZ04Q)Fw;jW$3Q8!Gj-(sIXlNQ<Pubh0v&k75Q}|56G{ZpKCt_}C2fYZ=
zxkt76NYUsfW_`_1&Rr+_W|KZW`7+Xe%{9Z~>qE=%9W#7@ES*OrU%77vnN!}}>_!tI
z`_+nFh*zjp%^LYW6Brgmw7{T9dE$<y*TwN9fe=R8du*O#o->_Nqh2>HN3XP&avRPo
zP_e*pC_H80VM=+WD_6XlBbr%X;)t~EU+0KD^okxaD4MBTnm*ZZ>J1`u%zps1)OxkS
z$x67%a5!Xf(?V{xylOkGb|ugj8gCQRI(jE8%d})z*u&4cTMBOD4jo$>1y5S#1<Sn6
za3r8=pX<Dl^F8r8lTkp1$S{R1`DxNFptXeID^A)R9k1asyf7p$6)iVM9}BAH2$M94
zA<>Qp^b+LLmf>DV%Ln^A1vQxb>Id#0DtwR%U)34;vSc<$`_l$LNjLb0OVqTNM=)Fp
z+f@Rc_Zs^$c2bqy^pV*`A3t=&0|RmXWkpY7^xFsCMU?K&(F*==IL01BxeqDw0FmxT
zh{XmUV(1CBKF8aSF!DPs>BesQl|=ymo*TfsbkD;R^nVSyl#_H0@nhb@K1LyoUMZ=k
zNL^gU=rZ<lpe`TKda7bj04eTT?8gCG!*LFJI1`A7G;0OUmSr3Y0NMxzaRD^sF7|*x
z-=v4>TiY$|eR_9PlyFS6f6arA3+VJ8AfveWsq|yyrrUkLCFtCOb=XSZpxb<z;;l?^
zOyD$S`j9tGe?b*Z;Eb1hAN}Q&=e?ZKRQe%CVufXVypok2#Nkd~ifAiEloXpJpnJs6
zc@NMa^10T-qKVOGk*n<%KABF9KEZ`%FD~^lssI>4KXzdQiLsqhaV>YaFs)?6n8hVZ
zLeRL3&xyM}T)|Z@?TB~-M5v43XpM{ABc7%N9`^?0lS1z_x{lCYMR0J6uJ~IywTx@e
z!HVfNg6pvTWYYsa3S_UUUWzm54cC)4k5J?rfyhS*`B(?!MS&dQbM8Z~i<)nGx{+$J
z`{+;qjp$QY<ZI_Ix|Y}jtbdBa#dIw71dZpQ&X#yn{;|o?^qumhPPeE|<1W6SmiQ?3
zqB|x{N+=BE_@Uw}#3gT+Xr4wc^%!5ad2^X(P3A@L*KZR1&HoJ^C;q<m1-tz1rYBtU
t9zI(4!s@28*%!UK>F>jX!<&9d@Q-hjIh?W1nJMQ_#7q}{_I9UV{sUQ#(BA+6

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
index ca41c3d0c429f..a9cf7c40e9d5d 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
@@ -45,7 +45,7 @@
     OwnershipClass,
     OwnershipTypeClass,
     StatusClass,
-    SubTypesClass,
+    SubTypesClass, UpstreamClass, DatasetLineageTypeClass, UpstreamLineageClass,
 )
 from datahub.utilities.dedup_list import deduplicate_list
 
@@ -999,6 +999,8 @@ def init_dashboard_tiles(workspace: PowerBiAPI.Workspace) -> None:
         # Scan is complete lets take the result
         scan_result = get_scan_result(scan_id=scan_id)
         LOGGER.debug(f"scan result = {scan_result}")
+        import json
+        print(json.dumps(scan_result, indent=1))
         workspace = PowerBiAPI.Workspace(
             id=scan_result["id"],
             name=scan_result["name"],
@@ -1110,6 +1112,40 @@ def __to_datahub_dataset(
                 aspect_name=Constant.STATUS,
                 aspect=StatusClass(removed=False),
             )
+            if table.name == 'two_source_table':
+                upstreams: List[UpstreamClass] = []
+                upstream_urn = builder.make_dataset_urn_with_platform_instance(
+                    "snowflake",
+                    "GSL_TEST_DB.PUBLIC.SALES_ANALYST_VIEW",
+                    "GSL_TEST_WH",
+                )
+
+                upstream_table = UpstreamClass(
+                    upstream_urn,
+                    DatasetLineageTypeClass.TRANSFORMED,
+                )
+
+                upstreams.append(upstream_table)
+
+                upstream_urn2 = builder.make_dataset_urn(
+                    "postgres",
+                    "mics.public.order_date",
+                )
+                upstream_table2 = UpstreamClass(
+                    upstream_urn2,
+                    DatasetLineageTypeClass.TRANSFORMED,
+                )
+                upstreams.append(upstream_table2)
+
+                upstream_lineage = UpstreamLineageClass(upstreams=upstreams)
+                mcp = MetadataChangeProposalWrapper(
+                    entityType="dataset",
+                    changeType=ChangeTypeClass.UPSERT,
+                    entityUrn=ds_urn,
+                    aspect=upstream_lineage,
+                )
+
+                dataset_mcps.extend([mcp])
 
             dataset_mcps.extend([info_mcp, status_mcp])
 
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index 29497bdd122c2..8a2a7ca01b50b 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -73,3 +73,10 @@ def test_parse_m_query12():
     expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="Select#(lf)*,#(lf)concat((UPPER(REPLACE(SALES_SPECIALIST,\'-\',\'\'))),#(lf)LEFT(CAST(INVOICE_DATE AS DATE),4)+LEFT(RIGHT(CAST(INVOICE_DATE AS DATE),5),2)) AS AGENT_KEY,#(lf)CASE#(lf)    WHEN CLASS = \'Software\' and (NOT(PRODUCT in (\'ADV\', \'Adv\') and left(ACCOUNT_ID,2)=\'10\') #(lf)    or V_ENTERPRISE_INVOICED_REVENUE.TYPE = \'Manual Adjustment\') THEN INVOICE_AMOUNT#(lf)    WHEN V_ENTERPRISE_INVOICED_REVENUE.TYPE IN (\'Recurring\',\'0\') THEN INVOICE_AMOUNT#(lf)    ELSE 0#(lf)END as SOFTWARE_INV#(lf)#(lf)from V_ENTERPRISE_INVOICED_REVENUE", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #"Added Conditional Column" = Table.AddColumn(Source, "Services", each if [CLASS] = "Services" then [INVOICE_AMOUNT] else 0),\n    #"Added Custom" = Table.AddColumn(#"Added Conditional Column", "Advanced New Sites", each if [PRODUCT] = "ADV"\nor [PRODUCT] = "Adv"\nthen [NEW_SITE]\nelse 0)\nin\n    #"Added Custom"'
     parse_tree: Tree = m_parser.parse_expression(expression)
     assert m_parser.get_output_dataset(parse_tree) == '"Added Custom"'
+
+
+def test_parse_m_query13():
+    expression: str = 'let\n    Source = Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]),\n Source2 = PostgreSQL.Database(\"localhost\", \"mics\"),\n  public_order_date = Source2{[Schema=\"public\",Item=\"order_date\"]}[Data],\n    GSL_TEST_DB_Database = Source{[Name=\"GSL_TEST_DB\",Kind=\"Database\"]}[Data],\n  PUBLIC_Schema = GSL_TEST_DB_Database{[Name=\"PUBLIC\",Kind=\"Schema\"]}[Data],\n   SALES_ANALYST_VIEW_View = PUBLIC_Schema{[Name=\"SALES_ANALYST_VIEW\",Kind=\"View\"]}[Data],\n  two_source_table  = Table.Combine({public_order_date, SALES_ANALYST_VIEW_View})\n in\n    two_source_table'
+    parse_tree: Tree = m_parser.parse_expression(expression)
+    assert m_parser.get_output_dataset(parse_tree) == 'two_source_table'
+

From 43f954a88c2c3ce1942e685126447442677e6a8b Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Fri, 2 Dec 2022 08:54:05 +0530
Subject: [PATCH 12/53] WIP

---
 .../ingestion/source/powerbi/m_parser.py      |  65 ++++++-
 .../ingestion/source/powerbi/powerbi.py       | 139 +++++---------
 .../integration/powerbi/test_m_parser.py      | 174 ++++++++++--------
 3 files changed, 202 insertions(+), 176 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
index 28d940ed12e37..a94744194b237 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
@@ -1,13 +1,22 @@
+from dataclasses import  dataclass
 import importlib.resources as pkg_resource
+from datahub.ingestion.source.powerbi.config import PowerBiDashboardSourceReport
 import logging
-from typing import List, Optional
+from typing import List, Optional, Any
 
-from lark import Lark, Tree
+from lark import Lark, Tree, Token
 
 logger = logging.getLogger(__name__)
 
 
-def get_output_dataset(root: Tree) -> Optional[str]:
+@dataclass
+class DataPlatformTable:
+    name: str
+    full_name: str
+    platform_type: str
+
+
+def get_output_variable(root: Tree) -> Optional[str]:
     def get_token_list_for_any(tree: Tree, rules: List[str]) -> List[Tree]:
         for rule in rules:
             token_list = [x for x in tree.find_data(rule)]
@@ -26,14 +35,62 @@ def get_token_list_for_any(tree: Tree, rules: List[str]) -> List[Tree]:
 
 
 def parse_expression(expression: str) -> Tree:
+    # Read lexical grammar as text
     grammar: str = pkg_resource.read_text(
         "datahub.ingestion.source.powerbi", "powerbi-lexical-grammar.rule"
     )
+
+    # Create lark parser for the grammar text
     lark_parser = Lark(grammar, start="let_expression", regex=True)
 
     parse_tree: Tree = lark_parser.parse(expression)
 
     logger.debug("Parse Tree")
-    logger.debug(parse_tree.pretty())
+    if logger.level == logging.DEBUG:  # Guard condition to avoid heavy pretty() function call
+        logger.debug(parse_tree.pretty())
 
     return parse_tree
+
+
+def get_upstream_tables(expression, reporter: PowerBiDashboardSourceReport) -> List[DataPlatformTable]:
+    parse_tree = parse_expression(expression)
+
+    output_variable = get_output_variable(parse_tree)
+
+    filter: Any = parse_tree.find_data("variable")
+
+    def find_variable(node: Tree, variable: str) -> bool:
+        for internal_child in node.children:
+            if isinstance(internal_child, Token):
+                if internal_child.value == variable:
+                    return True
+                continue
+            return find_variable(internal_child, variable)
+
+        return False
+
+    for tree in filter:
+        if find_variable(tree, output_variable):
+            print("Mohd1")
+            print(tree.pretty())
+            for node in tree.find_data("field_selection"):
+                print("Mohd2")
+                print(node)
+
+    return [
+        DataPlatformTable(
+            name="postgres_table",
+            full_name="book.public.test",
+            platform_type="PostgreSql"
+        ),
+        DataPlatformTable(
+            name="oracle_table",
+            full_name="book.public.test",
+            platform_type="Oracle"
+        ),
+        DataPlatformTable(
+            name="snowflake_table",
+            full_name="book.public.test",
+            platform_type="Snowflake"
+        ),
+    ]
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
index a9cf7c40e9d5d..1a29c933bb580 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
@@ -5,19 +5,18 @@
 #########################################################
 
 import logging
-from dataclasses import dataclass, field as dataclass_field
 from enum import Enum
 from time import sleep
-from typing import Any, Dict, Iterable, List, Optional, Tuple
+from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast
 from xmlrpc.client import Boolean
 
 import msal
-import pydantic
 import requests
 
 import datahub.emitter.mce_builder as builder
+from dataclasses import dataclass
 from datahub.configuration.common import ConfigurationError
-from datahub.configuration.source_common import EnvBasedSourceConfigBase
+from datahub.configuration.source_common import DEFAULT_ENV
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.common import PipelineContext
 from datahub.ingestion.api.decorators import (
@@ -30,6 +29,7 @@
 )
 from datahub.ingestion.api.source import Source, SourceReport
 from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.ingestion.source.powerbi.m_parser import DataPlatformTable
 from datahub.metadata.com.linkedin.pegasus2avro.common import ChangeAuditStamps
 from datahub.metadata.schema_classes import (
     BrowsePathsClass,
@@ -48,6 +48,8 @@
     SubTypesClass, UpstreamClass, DatasetLineageTypeClass, UpstreamLineageClass,
 )
 from datahub.utilities.dedup_list import deduplicate_list
+from datahub.ingestion.source.powerbi import m_parser
+from datahub.ingestion.source.powerbi.config import PowerBiDashboardSourceReport, PowerBiDashboardSourceConfig, PowerBiAPIConfig, PlatformDetail
 
 # Logger instance
 LOGGER = logging.getLogger(__name__)
@@ -105,61 +107,6 @@ class Constant:
     HTTP_RESPONSE_STATUS_CODE = "HttpResponseStatusCode"
 
 
-@dataclass
-class PowerBiDashboardSourceReport(SourceReport):
-    dashboards_scanned: int = 0
-    charts_scanned: int = 0
-    filtered_dashboards: List[str] = dataclass_field(default_factory=list)
-    filtered_charts: List[str] = dataclass_field(default_factory=list)
-
-    def report_dashboards_scanned(self, count: int = 1) -> None:
-        self.dashboards_scanned += count
-
-    def report_charts_scanned(self, count: int = 1) -> None:
-        self.charts_scanned += count
-
-    def report_dashboards_dropped(self, model: str) -> None:
-        self.filtered_dashboards.append(model)
-
-    def report_charts_dropped(self, view: str) -> None:
-        self.filtered_charts.append(view)
-
-
-class PowerBiAPIConfig(EnvBasedSourceConfigBase):
-    # Organsation Identifier
-    tenant_id: str = pydantic.Field(description="PowerBI tenant identifier")
-    # PowerBi workspace identifier
-    workspace_id: str = pydantic.Field(description="PowerBI workspace identifier")
-    # Dataset type mapping
-    dataset_type_mapping: Dict[str, str] = pydantic.Field(
-        description="Mapping of PowerBI datasource type to DataHub supported data-sources. See Quickstart Recipe for mapping"
-    )
-    # Azure app client identifier
-    client_id: str = pydantic.Field(description="Azure app client identifier")
-    # Azure app client secret
-    client_secret: str = pydantic.Field(description="Azure app client secret")
-    # timeout for meta-data scanning
-    scan_timeout: int = pydantic.Field(
-        default=60, description="timeout for PowerBI metadata scanning"
-    )
-    # Enable/Disable extracting ownership information of Dashboard
-    extract_ownership: bool = pydantic.Field(
-        default=True, description="Whether ownership should be ingested"
-    )
-    # Enable/Disable extracting report information
-    extract_reports: bool = pydantic.Field(
-        default=True, description="Whether reports should be ingested"
-    )
-
-
-class PowerBiDashboardSourceConfig(PowerBiAPIConfig):
-    platform_name: str = "powerbi"
-    platform_urn: str = builder.make_data_platform_urn(platform=platform_name)
-    # Not supporting the pattern
-    # dashboard_pattern: AllowDenyPattern = AllowDenyPattern.allow_all()
-    # chart_pattern: AllowDenyPattern = AllowDenyPattern.allow_all()
-
-
 class PowerBiAPI:
     # API endpoints of PowerBi to fetch dashboards, tiles, datasets
     API_ENDPOINTS = {
@@ -215,21 +162,21 @@ def __eq__(self, instance):
         def __hash__(self):
             return hash(self.__members())
 
+    @dataclass
+    class Table:
+        name: str
+        full_name: str
+        expression: Optional[str]
+
     # dataclasses for PowerBi Dashboard
     @dataclass
     class PowerBIDataset:
-        @dataclass
-        class Table:
-            name: str
-            full_name: str
-            expression: Optional[str]
-
         id: str
         name: str
         webUrl: Optional[str]
         workspace_id: str
         # Table in datasets
-        tables: List["Table"]
+        tables: List["PowerBiAPI.Table"]
 
         def get_urn_part(self):
             return f"datasets.{self.id}"
@@ -1034,8 +981,9 @@ def __eq__(self, instance):
         def __hash__(self):
             return id(self.id)
 
-    def __init__(self, config: PowerBiDashboardSourceConfig):
+    def __init__(self, config: PowerBiDashboardSourceConfig, reporter: PowerBiDashboardSourceReport):
         self.__config = config
+        self.__reporter = reporter
 
     def new_mcp(
         self,
@@ -1112,40 +1060,43 @@ def __to_datahub_dataset(
                 aspect_name=Constant.STATUS,
                 aspect=StatusClass(removed=False),
             )
-            if table.name == 'two_source_table':
-                upstreams: List[UpstreamClass] = []
+            # Check if upstreams table is available, parse them and create dataset URN for each upstream table
+            upstreams: List[UpstreamClass] = []
+            upstream_tables: List[DataPlatformTable] = m_parser.get_upstream_tables(table.expression, self.__reporter)
+            for upstream_table in upstream_tables:
+                platform: Union[str, PlatformDetail] = self.__config.dataset_type_mapping[upstream_table.platform_type]
+                platform_name: str = None
+                platform_instance_name: str = None
+                platform_env: str = DEFAULT_ENV
+                # Determine if PlatformDetail is provided
+                if isinstance(platform, PlatformDetail):
+                    platform_name = cast(PlatformDetail, platform).platform
+                    platform_instance_name = cast(PlatformDetail, platform).platform_instance
+                    platform_env = cast(PlatformDetail, platform).env
+                else:
+                    platform_name = platform
+
                 upstream_urn = builder.make_dataset_urn_with_platform_instance(
-                    "snowflake",
-                    "GSL_TEST_DB.PUBLIC.SALES_ANALYST_VIEW",
-                    "GSL_TEST_WH",
+                    platform=platform_name,
+                    platform_instance=platform_instance_name,
+                    env=platform_env,
+                    name=upstream_table.full_name,
                 )
-
                 upstream_table = UpstreamClass(
                     upstream_urn,
                     DatasetLineageTypeClass.TRANSFORMED,
                 )
-
                 upstreams.append(upstream_table)
 
-                upstream_urn2 = builder.make_dataset_urn(
-                    "postgres",
-                    "mics.public.order_date",
-                )
-                upstream_table2 = UpstreamClass(
-                    upstream_urn2,
-                    DatasetLineageTypeClass.TRANSFORMED,
-                )
-                upstreams.append(upstream_table2)
-
-                upstream_lineage = UpstreamLineageClass(upstreams=upstreams)
-                mcp = MetadataChangeProposalWrapper(
-                    entityType="dataset",
-                    changeType=ChangeTypeClass.UPSERT,
-                    entityUrn=ds_urn,
-                    aspect=upstream_lineage,
-                )
-
-                dataset_mcps.extend([mcp])
+                if len(upstreams) > 0:
+                    upstream_lineage = UpstreamLineageClass(upstreams=upstreams)
+                    mcp = MetadataChangeProposalWrapper(
+                        entityType="dataset",
+                        changeType=ChangeTypeClass.UPSERT,
+                        entityUrn=ds_urn,
+                        aspect=upstream_lineage,
+                    )
+                    dataset_mcps.extend([mcp])
 
             dataset_mcps.extend([info_mcp, status_mcp])
 
@@ -1681,7 +1632,7 @@ def __init__(self, config: PowerBiDashboardSourceConfig, ctx: PipelineContext):
         self.reporter = PowerBiDashboardSourceReport()
         self.auth_token = PowerBiAPI(self.source_config).get_access_token()
         self.powerbi_client = PowerBiAPI(self.source_config)
-        self.mapper = Mapper(config)
+        self.mapper = Mapper(config, self.reporter)
 
     @classmethod
     def create(cls, config_dict, ctx):
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index 8a2a7ca01b50b..0ffa9e635f43f 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -2,81 +2,99 @@
 
 from datahub.ingestion.source.powerbi import m_parser
 
-
-def test_parse_m_query1():
-    expression: str = 'let\n    Source = Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","PBI_TEST_WAREHOUSE_PROD",[Role="PBI_TEST_MEMBER"]),\n    PBI_TEST_Database = Source{[Name="PBI_TEST",Kind="Database"]}[Data],\n    TEST_Schema = PBI_TEST_Database{[Name="TEST",Kind="Schema"]}[Data],\n    TESTTABLE_Table = TEST_Schema{[Name="TESTTABLE",Kind="Table"]}[Data]\nin\n    TESTTABLE_Table'
-    parse_tree: Tree = m_parser.parse_expression(expression)
-    assert m_parser.get_output_dataset(parse_tree) == "TESTTABLE_Table"
-
-
-def test_parse_m_query2():
-    expression: str = 'let\n    Source = Value.NativeQuery(Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","operations_analytics_warehouse_prod",[Role="OPERATIONS_ANALYTICS_MEMBER"]){[Name="OPERATIONS_ANALYTICS"]}[Data], "SELECT#(lf)concat((UPPER(REPLACE(SELLER,\'-\',\'\'))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4", null, [EnableFolding=true]),\n    #"ADDed Conditional Column" = Table.AddColumn(Source, "SME Units ENT", each if [DEAL_TYPE] = "SME Unit" then [UNIT] else 0),\n    #"Added Conditional Column1" = Table.AddColumn(#"Added Conditional Column", "Banklink Units", each if [DEAL_TYPE] = "Banklink" then [UNIT] else 0),\n    #"Removed Columns" = Table.RemoveColumns(#"Added Conditional Column1",{"Banklink Units"}),\n    #"Added Custom" = Table.AddColumn(#"Removed Columns", "Banklink Units", each if [DEAL_TYPE] = "Banklink" and [SALES_TYPE] = "3 - Upsell"\nthen [UNIT]\n\nelse if [SALES_TYPE] = "Adjusted BL Migration"\nthen [UNIT]\n\nelse 0),\n    #"Added Custom1" = Table.AddColumn(#"Added Custom", "SME Units in $ (*$361)", each if [DEAL_TYPE] = "SME Unit" \nand [SALES_TYPE] <> "4 - Renewal"\n    then [UNIT] * 361\nelse 0),\n    #"Added Custom2" = Table.AddColumn(#"Added Custom1", "Banklink in $ (*$148)", each [Banklink Units] * 148)\nin\n    #"Added Custom2"'
-    parse_tree: Tree = m_parser.parse_expression(expression)
-    assert m_parser.get_output_dataset(parse_tree) == '"Added Custom2"'
-
-
-def test_parse_m_query3():
-    expression: str = 'let\n    Source = Value.NativeQuery(Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","operations_analytics_warehouse_prod",[Role="OPERATIONS_ANALYTICS_MEMBER"]){[Name="OPERATIONS_ANALYTICS"]}[Data], "select #(lf)UPPER(REPLACE(AGENT_NAME,\'-\',\'\')) AS Agent,#(lf)TIER,#(lf)UPPER(MANAGER),#(lf)TEAM_TYPE,#(lf)DATE_TARGET,#(lf)MONTHID,#(lf)TARGET_TEAM,#(lf)SELLER_EMAIL,#(lf)concat((UPPER(REPLACE(AGENT_NAME,\'-\',\'\'))), MONTHID) as AGENT_KEY,#(lf)UNIT_TARGET AS SME_Quota,#(lf)AMV_TARGET AS Revenue_Quota,#(lf)SERVICE_QUOTA,#(lf)BL_TARGET,#(lf)SOFTWARE_QUOTA as Software_Quota#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = \'Accounting\'#(lf)and TARGET_TEAM = \'Enterprise\'", null, [EnableFolding=true]),\n    #"Added Conditional Column" = Table.AddColumn(Source, "Has PS Software Quota?", each if [TIER] = "Expansion (Medium)" then "Yes" else if [TIER] = "Acquisition" then "Yes" else "No")\nin\n    #"Added Conditional Column"'
-    parse_tree: Tree = m_parser.parse_expression(expression)
-    assert m_parser.get_output_dataset(parse_tree) == '"Added Conditional Column"'
-
-
-def test_parse_m_query4():
-    expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="select *#(lf),concat((UPPER(REPLACE(CLIENT_MANAGER_QUOTED,\'-\',\'\'))), MONTHID) as AGENT_KEY#(lf),concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTHID) as CD_AGENT_KEY#(lf)#(lf)from V_OIP_ENT_2022"]),\n    #"Added Custom" = Table.AddColumn(Source, "OIP in $(*$350)", each [SALES_INVOICE_AMOUNT] * 350),\n    #"Changed Type" = Table.TransformColumnTypes(#"Added Custom",{{"OIP in $(*$350)", type number}})\nin\n    #"Changed Type"'
-    parse_tree: Tree = m_parser.parse_expression(expression)
-    assert m_parser.get_output_dataset(parse_tree) == '"Changed Type"'
-
-
-def test_parse_m_query5():
-    expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="Select *,#(lf)#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_QUOTED,\'-\',\'\'))), #(lf)LEFT(CAST(DTE AS DATE),4)+LEFT(RIGHT(CAST(DTE AS DATE),5),2)) AS AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), #(lf)LEFT(CAST(DTE AS DATE),4)+LEFT(RIGHT(CAST(DTE AS DATE),5),2)) AS CD_AGENT_KEY#(lf)#(lf)from V_INVOICE_BOOKING_2022"]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"CLIENT_ID", Int64.Type}}),\n    #"Added Conditional Column" = Table.AddColumn(#"Changed Type", "PS Software (One-Off)", each if Text.Contains([REVENUE_TYPE], "Software") then [Inv_Amt] else if Text.Contains([REVENUE_TYPE], "Tax Seminar") then [Inv_Amt] else 0),\n    #"Filtered Rows" = Table.SelectRows(#"Added Conditional Column", each true),\n    #"Duplicated Column" = Table.DuplicateColumn(#"Filtered Rows", "CLIENT_ID", "CLIENT_ID - Copy"),\n    #"Changed Type1" = Table.TransformColumnTypes(#"Duplicated Column",{{"CLIENT_ID - Copy", type text}}),\n    #"Renamed Columns" = Table.RenameColumns(#"Changed Type1",{{"CLIENT_ID - Copy", "CLIENT_ID for Filter"}})\nin\n    #"Renamed Columns"'
-    parse_tree: Tree = m_parser.parse_expression(expression)
-    assert m_parser.get_output_dataset(parse_tree) == '"Renamed Columns"'
-
-
-def test_parse_m_query6():
-    expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="SELECT *,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,\'-\',\'\'))), #(lf)LEFT(CAST(MONTH_DATE AS DATE),4)+LEFT(RIGHT(CAST(MONTH_DATE AS DATE),5),2)) AS AGENT_KEY#(lf)#(lf)FROM dbo.V_ARR_ADDS"]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"MONTH_DATE", type date}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "Month", each Date.Month([MONTH_DATE]))\nin\n    #"Added Custom"'
-    parse_tree: Tree = m_parser.parse_expression(expression)
-    assert m_parser.get_output_dataset(parse_tree) == '"Added Custom"'
-
-
-def test_parse_m_query7():
-    expression: str = "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select #(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS CLIENT_DIRECTOR,#(lf)TIER,#(lf)UPPER(MANAGER),#(lf)TEAM_TYPE,#(lf)DATE_TARGET,#(lf)MONTHID,#(lf)TARGET_TEAM,#(lf)SELLER_EMAIL,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY,#(lf)UNIT_TARGET AS SME_Quota,#(lf)AMV_TARGET AS Revenue_Quota,#(lf)SERVICE_QUOTA,#(lf)BL_TARGET,#(lf)SOFTWARE_QUOTA as Software_Quota#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Accounting'#(lf)and TARGET_TEAM = 'Enterprise'#(lf)AND TIER = 'Client Director'\", null, [EnableFolding=true])\nin\n    Source"
-    parse_tree: Tree = m_parser.parse_expression(expression)
-    assert m_parser.get_output_dataset(parse_tree) == "Source"
-
-
-def test_parse_m_query8():
-    expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,\'-\',\'\'))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"mth_date", type date}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "Month", each Date.Month([mth_date])),\n    #"Added Custom1" = Table.AddColumn(#"Added Custom", "TPV Opening", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n    #"Added Custom1"'
-    parse_tree: Tree = m_parser.parse_expression(expression)
-    assert m_parser.get_output_dataset(parse_tree) == '"Added Custom1"'
-
-
-def test_parse_m_query9():
-    expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="select#(lf)CLIENT_ID,#(lf)PARTNER_ACCOUNT_NAME,#(lf)CM_CLOSING_MNTH_COUNTRY,#(lf)MONTH_WID,#(lf)PS_DELETES,#(lf)CLIENT_MANAGER_CLOSING_MONTH,#(lf)SME_DELETES,#(lf)TPV_AMV_OPENING,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,\'-\',\'\'))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_TPV_LEADERBOARD", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"MONTH_WID", type text}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "MONTH_DATE", each Date.FromText(\nText.Range([MONTH_WID], 0,4) & "-"  &\nText.Range([MONTH_WID], 4,2)\n)),\n    #"Added Custom2" = Table.AddColumn(#"Added Custom", "Month", each Date.Month([MONTH_DATE])),\n    #"Added Custom1" = Table.AddColumn(#"Added Custom2", "TPV Opening", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n    #"Added Custom1"'
-    parse_tree: Tree = m_parser.parse_expression(expression)
-    assert m_parser.get_output_dataset(parse_tree) == '"Added Custom1"'
-
-
-def test_parse_m_query10():
-    expression: str = 'let\n    Source = Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","OPERATIONS_ANALYTICS_WAREHOUSE_PROD",[Role="OPERATIONS_ANALYTICS_MEMBER_AD"]),\n    OPERATIONS_ANALYTICS_Database = Source{[Name="OPERATIONS_ANALYTICS",Kind="Database"]}[Data],\n    TEST_Schema = OPERATIONS_ANALYTICS_Database{[Name="TEST",Kind="Schema"]}[Data],\n    LZ_MIGRATION_DOWNLOAD_View = TEST_Schema{[Name="LZ_MIGRATION_DOWNLOAD",Kind="View"]}[Data],\n    #"Changed Type" = Table.TransformColumnTypes(LZ_MIGRATION_DOWNLOAD_View,{{"MIGRATION_MONTH_ID", type text}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "Migration Month", each Date.FromText(\nText.Range([MIGRATION_MONTH_ID], 0,4) & "-" & \nText.Range([MIGRATION_MONTH_ID], 4,2) \n)),\n    #"Changed Type1" = Table.TransformColumnTypes(#"Added Custom",{{"Migration Month", type date}})\nin\n    #"Changed Type1"'
-    parse_tree: Tree = m_parser.parse_expression(expression)
-    assert m_parser.get_output_dataset(parse_tree) == '"Changed Type1"'
-
-
-def test_parse_m_query11():
-    expression: str = "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select *,#(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS Agent,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Industries'#(lf)and TARGET_TEAM = 'Enterprise'\", null, [EnableFolding=true])\nin\n    Source"
-    parse_tree: Tree = m_parser.parse_expression(expression)
-    assert m_parser.get_output_dataset(parse_tree) == "Source"
-
-
-def test_parse_m_query12():
-    expression: str = 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="Select#(lf)*,#(lf)concat((UPPER(REPLACE(SALES_SPECIALIST,\'-\',\'\'))),#(lf)LEFT(CAST(INVOICE_DATE AS DATE),4)+LEFT(RIGHT(CAST(INVOICE_DATE AS DATE),5),2)) AS AGENT_KEY,#(lf)CASE#(lf)    WHEN CLASS = \'Software\' and (NOT(PRODUCT in (\'ADV\', \'Adv\') and left(ACCOUNT_ID,2)=\'10\') #(lf)    or V_ENTERPRISE_INVOICED_REVENUE.TYPE = \'Manual Adjustment\') THEN INVOICE_AMOUNT#(lf)    WHEN V_ENTERPRISE_INVOICED_REVENUE.TYPE IN (\'Recurring\',\'0\') THEN INVOICE_AMOUNT#(lf)    ELSE 0#(lf)END as SOFTWARE_INV#(lf)#(lf)from V_ENTERPRISE_INVOICED_REVENUE", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #"Added Conditional Column" = Table.AddColumn(Source, "Services", each if [CLASS] = "Services" then [INVOICE_AMOUNT] else 0),\n    #"Added Custom" = Table.AddColumn(#"Added Conditional Column", "Advanced New Sites", each if [PRODUCT] = "ADV"\nor [PRODUCT] = "Adv"\nthen [NEW_SITE]\nelse 0)\nin\n    #"Added Custom"'
-    parse_tree: Tree = m_parser.parse_expression(expression)
-    assert m_parser.get_output_dataset(parse_tree) == '"Added Custom"'
-
-
-def test_parse_m_query13():
-    expression: str = 'let\n    Source = Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]),\n Source2 = PostgreSQL.Database(\"localhost\", \"mics\"),\n  public_order_date = Source2{[Schema=\"public\",Item=\"order_date\"]}[Data],\n    GSL_TEST_DB_Database = Source{[Name=\"GSL_TEST_DB\",Kind=\"Database\"]}[Data],\n  PUBLIC_Schema = GSL_TEST_DB_Database{[Name=\"PUBLIC\",Kind=\"Schema\"]}[Data],\n   SALES_ANALYST_VIEW_View = PUBLIC_Schema{[Name=\"SALES_ANALYST_VIEW\",Kind=\"View\"]}[Data],\n  two_source_table  = Table.Combine({public_order_date, SALES_ANALYST_VIEW_View})\n in\n    two_source_table'
-    parse_tree: Tree = m_parser.parse_expression(expression)
-    assert m_parser.get_output_dataset(parse_tree) == 'two_source_table'
-
+M_QUERIES = [
+    'let\n    Source = Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","PBI_TEST_WAREHOUSE_PROD",[Role="PBI_TEST_MEMBER"]),\n    PBI_TEST_Database = Source{[Name="PBI_TEST",Kind="Database"]}[Data],\n    TEST_Schema = PBI_TEST_Database{[Name="TEST",Kind="Schema"]}[Data],\n    TESTTABLE_Table = TEST_Schema{[Name="TESTTABLE",Kind="Table"]}[Data]\nin\n    TESTTABLE_Table',
+    'let\n    Source = Value.NativeQuery(Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","operations_analytics_warehouse_prod",[Role="OPERATIONS_ANALYTICS_MEMBER"]){[Name="OPERATIONS_ANALYTICS"]}[Data], "SELECT#(lf)concat((UPPER(REPLACE(SELLER,\'-\',\'\'))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4", null, [EnableFolding=true]),\n    #"ADDed Conditional Column" = Table.AddColumn(Source, "SME Units ENT", each if [DEAL_TYPE] = "SME Unit" then [UNIT] else 0),\n    #"Added Conditional Column1" = Table.AddColumn(#"Added Conditional Column", "Banklink Units", each if [DEAL_TYPE] = "Banklink" then [UNIT] else 0),\n    #"Removed Columns" = Table.RemoveColumns(#"Added Conditional Column1",{"Banklink Units"}),\n    #"Added Custom" = Table.AddColumn(#"Removed Columns", "Banklink Units", each if [DEAL_TYPE] = "Banklink" and [SALES_TYPE] = "3 - Upsell"\nthen [UNIT]\n\nelse if [SALES_TYPE] = "Adjusted BL Migration"\nthen [UNIT]\n\nelse 0),\n    #"Added Custom1" = Table.AddColumn(#"Added Custom", "SME Units in $ (*$361)", each if [DEAL_TYPE] = "SME Unit" \nand [SALES_TYPE] <> "4 - Renewal"\n    then [UNIT] * 361\nelse 0),\n    #"Added Custom2" = Table.AddColumn(#"Added Custom1", "Banklink in $ (*$148)", each [Banklink Units] * 148)\nin\n    #"Added Custom2"',
+    'let\n    Source = Value.NativeQuery(Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","operations_analytics_warehouse_prod",[Role="OPERATIONS_ANALYTICS_MEMBER"]){[Name="OPERATIONS_ANALYTICS"]}[Data], "select #(lf)UPPER(REPLACE(AGENT_NAME,\'-\',\'\')) AS Agent,#(lf)TIER,#(lf)UPPER(MANAGER),#(lf)TEAM_TYPE,#(lf)DATE_TARGET,#(lf)MONTHID,#(lf)TARGET_TEAM,#(lf)SELLER_EMAIL,#(lf)concat((UPPER(REPLACE(AGENT_NAME,\'-\',\'\'))), MONTHID) as AGENT_KEY,#(lf)UNIT_TARGET AS SME_Quota,#(lf)AMV_TARGET AS Revenue_Quota,#(lf)SERVICE_QUOTA,#(lf)BL_TARGET,#(lf)SOFTWARE_QUOTA as Software_Quota#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = \'Accounting\'#(lf)and TARGET_TEAM = \'Enterprise\'", null, [EnableFolding=true]),\n    #"Added Conditional Column" = Table.AddColumn(Source, "Has PS Software Quota?", each if [TIER] = "Expansion (Medium)" then "Yes" else if [TIER] = "Acquisition" then "Yes" else "No")\nin\n    #"Added Conditional Column"',
+    'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="select *#(lf),concat((UPPER(REPLACE(CLIENT_MANAGER_QUOTED,\'-\',\'\'))), MONTHID) as AGENT_KEY#(lf),concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTHID) as CD_AGENT_KEY#(lf)#(lf)from V_OIP_ENT_2022"]),\n    #"Added Custom" = Table.AddColumn(Source, "OIP in $(*$350)", each [SALES_INVOICE_AMOUNT] * 350),\n    #"Changed Type" = Table.TransformColumnTypes(#"Added Custom",{{"OIP in $(*$350)", type number}})\nin\n    #"Changed Type"',
+    'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="Select *,#(lf)#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_QUOTED,\'-\',\'\'))), #(lf)LEFT(CAST(DTE AS DATE),4)+LEFT(RIGHT(CAST(DTE AS DATE),5),2)) AS AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), #(lf)LEFT(CAST(DTE AS DATE),4)+LEFT(RIGHT(CAST(DTE AS DATE),5),2)) AS CD_AGENT_KEY#(lf)#(lf)from V_INVOICE_BOOKING_2022"]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"CLIENT_ID", Int64.Type}}),\n    #"Added Conditional Column" = Table.AddColumn(#"Changed Type", "PS Software (One-Off)", each if Text.Contains([REVENUE_TYPE], "Software") then [Inv_Amt] else if Text.Contains([REVENUE_TYPE], "Tax Seminar") then [Inv_Amt] else 0),\n    #"Filtered Rows" = Table.SelectRows(#"Added Conditional Column", each true),\n    #"Duplicated Column" = Table.DuplicateColumn(#"Filtered Rows", "CLIENT_ID", "CLIENT_ID - Copy"),\n    #"Changed Type1" = Table.TransformColumnTypes(#"Duplicated Column",{{"CLIENT_ID - Copy", type text}}),\n    #"Renamed Columns" = Table.RenameColumns(#"Changed Type1",{{"CLIENT_ID - Copy", "CLIENT_ID for Filter"}})\nin\n    #"Renamed Columns"',
+    'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="SELECT *,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,\'-\',\'\'))), #(lf)LEFT(CAST(MONTH_DATE AS DATE),4)+LEFT(RIGHT(CAST(MONTH_DATE AS DATE),5),2)) AS AGENT_KEY#(lf)#(lf)FROM dbo.V_ARR_ADDS"]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"MONTH_DATE", type date}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "Month", each Date.Month([MONTH_DATE]))\nin\n    #"Added Custom"',
+    "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select #(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS CLIENT_DIRECTOR,#(lf)TIER,#(lf)UPPER(MANAGER),#(lf)TEAM_TYPE,#(lf)DATE_TARGET,#(lf)MONTHID,#(lf)TARGET_TEAM,#(lf)SELLER_EMAIL,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY,#(lf)UNIT_TARGET AS SME_Quota,#(lf)AMV_TARGET AS Revenue_Quota,#(lf)SERVICE_QUOTA,#(lf)BL_TARGET,#(lf)SOFTWARE_QUOTA as Software_Quota#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Accounting'#(lf)and TARGET_TEAM = 'Enterprise'#(lf)AND TIER = 'Client Director'\", null, [EnableFolding=true])\nin\n    Source",
+    'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,\'-\',\'\'))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"mth_date", type date}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "Month", each Date.Month([mth_date])),\n    #"Added Custom1" = Table.AddColumn(#"Added Custom", "TPV Opening", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n    #"Added Custom1"',
+    'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="select#(lf)CLIENT_ID,#(lf)PARTNER_ACCOUNT_NAME,#(lf)CM_CLOSING_MNTH_COUNTRY,#(lf)MONTH_WID,#(lf)PS_DELETES,#(lf)CLIENT_MANAGER_CLOSING_MONTH,#(lf)SME_DELETES,#(lf)TPV_AMV_OPENING,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,\'-\',\'\'))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_TPV_LEADERBOARD", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"MONTH_WID", type text}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "MONTH_DATE", each Date.FromText(\nText.Range([MONTH_WID], 0,4) & "-"  &\nText.Range([MONTH_WID], 4,2)\n)),\n    #"Added Custom2" = Table.AddColumn(#"Added Custom", "Month", each Date.Month([MONTH_DATE])),\n    #"Added Custom1" = Table.AddColumn(#"Added Custom2", "TPV Opening", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n    #"Added Custom1"',
+    'let\n    Source = Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","OPERATIONS_ANALYTICS_WAREHOUSE_PROD",[Role="OPERATIONS_ANALYTICS_MEMBER_AD"]),\n    OPERATIONS_ANALYTICS_Database = Source{[Name="OPERATIONS_ANALYTICS",Kind="Database"]}[Data],\n    TEST_Schema = OPERATIONS_ANALYTICS_Database{[Name="TEST",Kind="Schema"]}[Data],\n    LZ_MIGRATION_DOWNLOAD_View = TEST_Schema{[Name="LZ_MIGRATION_DOWNLOAD",Kind="View"]}[Data],\n    #"Changed Type" = Table.TransformColumnTypes(LZ_MIGRATION_DOWNLOAD_View,{{"MIGRATION_MONTH_ID", type text}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "Migration Month", each Date.FromText(\nText.Range([MIGRATION_MONTH_ID], 0,4) & "-" & \nText.Range([MIGRATION_MONTH_ID], 4,2) \n)),\n    #"Changed Type1" = Table.TransformColumnTypes(#"Added Custom",{{"Migration Month", type date}})\nin\n    #"Changed Type1"',
+    "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select *,#(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS Agent,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Industries'#(lf)and TARGET_TEAM = 'Enterprise'\", null, [EnableFolding=true])\nin\n    Source",
+    'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="Select#(lf)*,#(lf)concat((UPPER(REPLACE(SALES_SPECIALIST,\'-\',\'\'))),#(lf)LEFT(CAST(INVOICE_DATE AS DATE),4)+LEFT(RIGHT(CAST(INVOICE_DATE AS DATE),5),2)) AS AGENT_KEY,#(lf)CASE#(lf)    WHEN CLASS = \'Software\' and (NOT(PRODUCT in (\'ADV\', \'Adv\') and left(ACCOUNT_ID,2)=\'10\') #(lf)    or V_ENTERPRISE_INVOICED_REVENUE.TYPE = \'Manual Adjustment\') THEN INVOICE_AMOUNT#(lf)    WHEN V_ENTERPRISE_INVOICED_REVENUE.TYPE IN (\'Recurring\',\'0\') THEN INVOICE_AMOUNT#(lf)    ELSE 0#(lf)END as SOFTWARE_INV#(lf)#(lf)from V_ENTERPRISE_INVOICED_REVENUE", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #"Added Conditional Column" = Table.AddColumn(Source, "Services", each if [CLASS] = "Services" then [INVOICE_AMOUNT] else 0),\n    #"Added Custom" = Table.AddColumn(#"Added Conditional Column", "Advanced New Sites", each if [PRODUCT] = "ADV"\nor [PRODUCT] = "Adv"\nthen [NEW_SITE]\nelse 0)\nin\n    #"Added Custom"',
+    'let\n    Source = Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]),\n Source2 = PostgreSQL.Database(\"localhost\", \"mics\"),\n  public_order_date = Source2{[Schema=\"public\",Item=\"order_date\"]}[Data],\n    GSL_TEST_DB_Database = Source{[Name=\"GSL_TEST_DB\",Kind=\"Database\"]}[Data],\n  PUBLIC_Schema = GSL_TEST_DB_Database{[Name=\"PUBLIC\",Kind=\"Schema\"]}[Data],\n   SALES_ANALYST_VIEW_View = PUBLIC_Schema{[Name=\"SALES_ANALYST_VIEW\",Kind=\"View\"]}[Data],\n  two_source_table  = Table.Combine({public_order_date, SALES_ANALYST_VIEW_View})\n in\n    two_source_table',
+]
+
+
+# def test_parse_m_query1():
+#     expression: str = M_QUERIES[0]
+#     parse_tree: Tree = m_parser.parse_expression(expression)
+#     assert m_parser.get_output_variable(parse_tree) == "TESTTABLE_Table"
+#
+#
+# def test_parse_m_query2():
+#     expression: str = M_QUERIES[1]
+#     parse_tree: Tree = m_parser.parse_expression(expression)
+#     assert m_parser.get_output_variable(parse_tree) == '"Added Custom2"'
+#
+#
+# def test_parse_m_query3():
+#     expression: str = M_QUERIES[2]
+#     parse_tree: Tree = m_parser.parse_expression(expression)
+#     assert m_parser.get_output_variable(parse_tree) == '"Added Conditional Column"'
+#
+#
+# def test_parse_m_query4():
+#     expression: str = M_QUERIES[3]
+#     parse_tree: Tree = m_parser.parse_expression(expression)
+#     assert m_parser.get_output_variable(parse_tree) == '"Changed Type"'
+#
+#
+# def test_parse_m_query5():
+#     expression: str = M_QUERIES[4]
+#     parse_tree: Tree = m_parser.parse_expression(expression)
+#     assert m_parser.get_output_variable(parse_tree) == '"Renamed Columns"'
+#
+#
+# def test_parse_m_query6():
+#     expression: str = M_QUERIES[5]
+#     parse_tree: Tree = m_parser.parse_expression(expression)
+#     assert m_parser.get_output_variable(parse_tree) == '"Added Custom"'
+#
+#
+# def test_parse_m_query7():
+#     expression: str = M_QUERIES[6]
+#     parse_tree: Tree = m_parser.parse_expression(expression)
+#     assert m_parser.get_output_variable(parse_tree) == "Source"
+#
+#
+# def test_parse_m_query8():
+#     expression: str = M_QUERIES[7]
+#     parse_tree: Tree = m_parser.parse_expression(expression)
+#     assert m_parser.get_output_variable(parse_tree) == '"Added Custom1"'
+#
+#
+# def test_parse_m_query9():
+#     expression: str = M_QUERIES[8]
+#     parse_tree: Tree = m_parser.parse_expression(expression)
+#     assert m_parser.get_output_variable(parse_tree) == '"Added Custom1"'
+#
+#
+# def test_parse_m_query10():
+#     expression: str = M_QUERIES[9]
+#     parse_tree: Tree = m_parser.parse_expression(expression)
+#     assert m_parser.get_output_variable(parse_tree) == '"Changed Type1"'
+#
+#
+# def test_parse_m_query11():
+#     expression: str = M_QUERIES[10]
+#     parse_tree: Tree = m_parser.parse_expression(expression)
+#     assert m_parser.get_output_variable(parse_tree) == "Source"
+#
+#
+# def test_parse_m_query12():
+#     expression: str = M_QUERIES[11]
+#     parse_tree: Tree = m_parser.parse_expression(expression)
+#     assert m_parser.get_output_variable(parse_tree) == '"Added Custom"'
+#
+#
+# def test_parse_m_query13():
+#     expression: str = M_QUERIES[12]
+#     parse_tree: Tree = m_parser.parse_expression(expression)
+#     assert m_parser.get_output_variable(parse_tree) == 'two_source_table'
+
+def test_get_upstream():
+    m_parser.get_upstream_tables(M_QUERIES[0], None)

From fe7c50573baeaed4ee3988d7ab88e5f620b77a75 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Wed, 7 Dec 2022 13:46:25 +0530
Subject: [PATCH 13/53] config

---
 .../ingestion/source/powerbi/config.py        | 74 +++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
new file mode 100644
index 0000000000000..363aedfeef9b9
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
@@ -0,0 +1,74 @@
+import pydantic
+import datahub.emitter.mce_builder as builder
+
+from dataclasses import field as dataclass_field
+from typing import List
+
+from dataclasses import  dataclass
+from datahub.configuration.source_common import EnvBasedSourceConfigBase, DEFAULT_ENV
+from typing import Dict, Union
+from datahub.ingestion.api.source import SourceReport
+
+
+@dataclass
+class PowerBiDashboardSourceReport(SourceReport):
+    dashboards_scanned: int = 0
+    charts_scanned: int = 0
+    filtered_dashboards: List[str] = dataclass_field(default_factory=list)
+    filtered_charts: List[str] = dataclass_field(default_factory=list)
+
+    def report_dashboards_scanned(self, count: int = 1) -> None:
+        self.dashboards_scanned += count
+
+    def report_charts_scanned(self, count: int = 1) -> None:
+        self.charts_scanned += count
+
+    def report_dashboards_dropped(self, model: str) -> None:
+        self.filtered_dashboards.append(model)
+
+    def report_charts_dropped(self, view: str) -> None:
+        self.filtered_charts.append(view)
+
+
+@dataclass
+class PlatformDetail:
+    platform: str = pydantic.Field(description="DataHub platform name. Example postgres or oracle or snowflake")
+    platform_instance: str = pydantic.Field(default=None, description="DataHub platform instance name. It should be same as you have used in ingestion receipe of DataHub platform ingestion source")
+    env: str = pydantic.Field(
+        default=DEFAULT_ENV,
+        description="The environment that all assets produced by DataHub platform ingestion source belong to",
+    )
+
+
+class PowerBiAPIConfig(EnvBasedSourceConfigBase):
+    # Organisation Identifier
+    tenant_id: str = pydantic.Field(description="PowerBI tenant identifier")
+    # PowerBi workspace identifier
+    workspace_id: str = pydantic.Field(description="PowerBI workspace identifier")
+    # Dataset type mapping PowerBI support many type of data-sources. Here user need to define what type of PowerBI
+    # DataSource need to be mapped to corresponding DataHub Platform DataSource. For example PowerBI `Snowflake` is
+    # mapped to DataHub `snowflake` PowerBI `PostgreSQL` is mapped to DataHub `postgres` and so on.
+    dataset_type_mapping: Union[Dict[str, str], Dict[str, PlatformDetail]] = pydantic.Field(
+        description="Mapping of PowerBI datasource type to DataHub supported data-sources. See Quickstart Recipe for mapping"
+    )
+    # Azure app client identifier
+    client_id: str = pydantic.Field(description="Azure app client identifier")
+    # Azure app client secret
+    client_secret: str = pydantic.Field(description="Azure app client secret")
+    # timeout for meta-data scanning
+    scan_timeout: int = pydantic.Field(
+        default=60, description="timeout for PowerBI metadata scanning"
+    )
+    # Enable/Disable extracting ownership information of Dashboard
+    extract_ownership: bool = pydantic.Field(
+        default=True, description="Whether ownership should be ingested"
+    )
+    # Enable/Disable extracting report information
+    extract_reports: bool = pydantic.Field(
+        default=True, description="Whether reports should be ingested"
+    )
+
+
+class PowerBiDashboardSourceConfig(PowerBiAPIConfig):
+    platform_name: str = "powerbi"
+    platform_urn: str = builder.make_data_platform_urn(platform=platform_name)

From f31c2e46273e6134316d4ae0aecc8f47bccdf8e9 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Wed, 7 Dec 2022 21:52:18 +0530
Subject: [PATCH 14/53] WIP

---
 .../ingestion/source/powerbi/config.py        |  51 +
 .../ingestion/source/powerbi/m_parser.py      |  91 +-
 .../ingestion/source/powerbi/powerbi.py       | 918 +-----------------
 .../integration/powerbi/test_m_parser.py      |   8 +-
 4 files changed, 129 insertions(+), 939 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
index 363aedfeef9b9..164dfb63ed612 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
@@ -9,6 +9,57 @@
 from typing import Dict, Union
 from datahub.ingestion.api.source import SourceReport
 
+class Constant:
+    """
+    keys used in powerbi plugin
+    """
+
+    PBIAccessToken = "PBIAccessToken"
+    DASHBOARD_LIST = "DASHBOARD_LIST"
+    TILE_LIST = "TILE_LIST"
+    REPORT_LIST = "REPORT_LIST"
+    PAGE_BY_REPORT = "PAGE_BY_REPORT"
+    DATASET_GET = "DATASET_GET"
+    REPORT_GET = "REPORT_GET"
+    DATASOURCE_GET = "DATASOURCE_GET"
+    TILE_GET = "TILE_GET"
+    ENTITY_USER_LIST = "ENTITY_USER_LIST"
+    SCAN_CREATE = "SCAN_CREATE"
+    SCAN_GET = "SCAN_GET"
+    SCAN_RESULT_GET = "SCAN_RESULT_GET"
+    Authorization = "Authorization"
+    WorkspaceId = "WorkspaceId"
+    DashboardId = "DashboardId"
+    DatasetId = "DatasetId"
+    ReportId = "ReportId"
+    SCAN_ID = "ScanId"
+    Dataset_URN = "DatasetURN"
+    CHART_URN = "ChartURN"
+    CHART = "chart"
+    CORP_USER = "corpuser"
+    CORP_USER_INFO = "corpUserInfo"
+    CORP_USER_KEY = "corpUserKey"
+    CHART_INFO = "chartInfo"
+    STATUS = "status"
+    CHART_ID = "powerbi.linkedin.com/charts/{}"
+    CHART_KEY = "chartKey"
+    DASHBOARD_ID = "powerbi.linkedin.com/dashboards/{}"
+    DASHBOARD = "dashboard"
+    DASHBOARD_KEY = "dashboardKey"
+    OWNERSHIP = "ownership"
+    BROWSERPATH = "browsePaths"
+    DASHBOARD_INFO = "dashboardInfo"
+    DATAPLATFORM_INSTANCE = "dataPlatformInstance"
+    DATASET = "dataset"
+    DATASET_ID = "powerbi.linkedin.com/datasets/{}"
+    DATASET_KEY = "datasetKey"
+    DATASET_PROPERTIES = "datasetProperties"
+    VALUE = "value"
+    ENTITY = "ENTITY"
+    ID = "ID"
+    HTTP_RESPONSE_TEXT = "HttpResponseText"
+    HTTP_RESPONSE_STATUS_CODE = "HttpResponseStatusCode"
+
 
 @dataclass
 class PowerBiDashboardSourceReport(SourceReport):
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
index a94744194b237..04d6dd5da69f3 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
@@ -1,8 +1,11 @@
+from abc import ABC
+
 from dataclasses import  dataclass
 import importlib.resources as pkg_resource
 from datahub.ingestion.source.powerbi.config import PowerBiDashboardSourceReport
+from datahub.ingestion.source.powerbi.proxy import PowerBiAPI
 import logging
-from typing import List, Optional, Any
+from typing import List, Optional, Any, Dict
 
 from lark import Lark, Tree, Token
 
@@ -16,6 +19,45 @@ class DataPlatformTable:
     platform_type: str
 
 
+class AbstractMQueryResolver(ABC):
+    pass
+
+
+class AbstractDataAccessMQueryResolver(AbstractMQueryResolver, ABC):
+    pass
+
+
+class PostgresMQueryResolver(AbstractDataAccessMQueryResolver):
+    pass
+
+
+class OracleMQueryResolver(AbstractDataAccessMQueryResolver):
+    pass
+
+
+class SnowflakeMQueryResolver(AbstractDataAccessMQueryResolver):
+    pass
+
+
+class AbstractTableAccessMQueryResolver(AbstractDataAccessMQueryResolver, ABC):
+    pass
+
+
+class TableCombineMQueryResolver(AbstractTableAccessMQueryResolver):
+    pass
+
+
+DATA_ACCESS_RESOLVER: Dict[str, AbstractMQueryResolver.__class__] = {
+    "PostgreSQL.Database": PostgresMQueryResolver,
+    "Oracle.Database": OracleMQueryResolver,
+    "Snowflake.Database": SnowflakeMQueryResolver,
+}
+
+TABLE_ACCESS_RESOLVER: Dict[str, AbstractMQueryResolver.__class__] = {
+    "Table.Combine": TableCombineMQueryResolver,
+}
+
+
 def get_output_variable(root: Tree) -> Optional[str]:
     def get_token_list_for_any(tree: Tree, rules: List[str]) -> List[Tree]:
         for rule in rules:
@@ -52,30 +94,35 @@ def parse_expression(expression: str) -> Tree:
     return parse_tree
 
 
-def get_upstream_tables(expression, reporter: PowerBiDashboardSourceReport) -> List[DataPlatformTable]:
-    parse_tree = parse_expression(expression)
+def get_upstream_tables(table: PowerBiAPI.Table, reporter: PowerBiDashboardSourceReport) -> List[DataPlatformTable]:
+    parse_tree = parse_expression(table.expression)
 
     output_variable = get_output_variable(parse_tree)
 
-    filter: Any = parse_tree.find_data("variable")
-
-    def find_variable(node: Tree, variable: str) -> bool:
-        for internal_child in node.children:
-            if isinstance(internal_child, Token):
-                if internal_child.value == variable:
-                    return True
-                continue
-            return find_variable(internal_child, variable)
-
-        return False
-
-    for tree in filter:
-        if find_variable(tree, output_variable):
-            print("Mohd1")
-            print(tree.pretty())
-            for node in tree.find_data("field_selection"):
-                print("Mohd2")
-                print(node)
+    filter: Any = parse_tree.find_data("invoke_expression")
+    tokens: List[Any] = list(filter)
+    print("Length = {}".format(len(tokens)))
+    for tree in tokens:
+        print(tree.pretty())
+
+    # filter: Any = parse_tree.find_data("variable")
+    # def find_variable(node: Tree, variable: str) -> bool:
+    #     for internal_child in node.children:
+    #         if isinstance(internal_child, Token):
+    #             if internal_child.value == variable:
+    #                 return True
+    #             continue
+    #         return find_variable(internal_child, variable)
+    #
+    #     return False
+    #
+    # for tree in filter:
+    #     if find_variable(tree, output_variable):
+    #         print("Mohd1")
+    #         print(tree.pretty())
+    #         for node in tree.find_data("field_selection"):
+    #             print("Mohd2")
+    #             print(node)
 
     return [
         DataPlatformTable(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
index 1a29c933bb580..e11525c6f0cdd 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
@@ -5,17 +5,9 @@
 #########################################################
 
 import logging
-from enum import Enum
-from time import sleep
 from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast
-from xmlrpc.client import Boolean
-
-import msal
-import requests
 
 import datahub.emitter.mce_builder as builder
-from dataclasses import dataclass
-from datahub.configuration.common import ConfigurationError
 from datahub.configuration.source_common import DEFAULT_ENV
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.common import PipelineContext
@@ -49,919 +41,13 @@
 )
 from datahub.utilities.dedup_list import deduplicate_list
 from datahub.ingestion.source.powerbi import m_parser
-from datahub.ingestion.source.powerbi.config import PowerBiDashboardSourceReport, PowerBiDashboardSourceConfig, PowerBiAPIConfig, PlatformDetail
-
+from datahub.ingestion.source.powerbi.config import PowerBiDashboardSourceReport, PowerBiDashboardSourceConfig, PlatformDetail
+from datahub.ingestion.source.powerbi.proxy import PowerBiAPI
 # Logger instance
 LOGGER = logging.getLogger(__name__)
 
 
-class Constant:
-    """
-    keys used in powerbi plugin
-    """
-
-    PBIAccessToken = "PBIAccessToken"
-    DASHBOARD_LIST = "DASHBOARD_LIST"
-    TILE_LIST = "TILE_LIST"
-    REPORT_LIST = "REPORT_LIST"
-    PAGE_BY_REPORT = "PAGE_BY_REPORT"
-    DATASET_GET = "DATASET_GET"
-    REPORT_GET = "REPORT_GET"
-    DATASOURCE_GET = "DATASOURCE_GET"
-    TILE_GET = "TILE_GET"
-    ENTITY_USER_LIST = "ENTITY_USER_LIST"
-    SCAN_CREATE = "SCAN_CREATE"
-    SCAN_GET = "SCAN_GET"
-    SCAN_RESULT_GET = "SCAN_RESULT_GET"
-    Authorization = "Authorization"
-    WorkspaceId = "WorkspaceId"
-    DashboardId = "DashboardId"
-    DatasetId = "DatasetId"
-    ReportId = "ReportId"
-    SCAN_ID = "ScanId"
-    Dataset_URN = "DatasetURN"
-    CHART_URN = "ChartURN"
-    CHART = "chart"
-    CORP_USER = "corpuser"
-    CORP_USER_INFO = "corpUserInfo"
-    CORP_USER_KEY = "corpUserKey"
-    CHART_INFO = "chartInfo"
-    STATUS = "status"
-    CHART_ID = "powerbi.linkedin.com/charts/{}"
-    CHART_KEY = "chartKey"
-    DASHBOARD_ID = "powerbi.linkedin.com/dashboards/{}"
-    DASHBOARD = "dashboard"
-    DASHBOARD_KEY = "dashboardKey"
-    OWNERSHIP = "ownership"
-    BROWSERPATH = "browsePaths"
-    DASHBOARD_INFO = "dashboardInfo"
-    DATAPLATFORM_INSTANCE = "dataPlatformInstance"
-    DATASET = "dataset"
-    DATASET_ID = "powerbi.linkedin.com/datasets/{}"
-    DATASET_KEY = "datasetKey"
-    DATASET_PROPERTIES = "datasetProperties"
-    VALUE = "value"
-    ENTITY = "ENTITY"
-    ID = "ID"
-    HTTP_RESPONSE_TEXT = "HttpResponseText"
-    HTTP_RESPONSE_STATUS_CODE = "HttpResponseStatusCode"
-
-
-class PowerBiAPI:
-    # API endpoints of PowerBi to fetch dashboards, tiles, datasets
-    API_ENDPOINTS = {
-        Constant.DASHBOARD_LIST: "{POWERBI_BASE_URL}/{WORKSPACE_ID}/dashboards",
-        Constant.ENTITY_USER_LIST: "{POWERBI_ADMIN_BASE_URL}/{ENTITY}/{ENTITY_ID}/users",
-        Constant.TILE_LIST: "{POWERBI_BASE_URL}/{WORKSPACE_ID}/dashboards/{DASHBOARD_ID}/tiles",
-        Constant.DATASET_GET: "{POWERBI_BASE_URL}/{WORKSPACE_ID}/datasets/{DATASET_ID}",
-        Constant.DATASOURCE_GET: "{POWERBI_BASE_URL}/{WORKSPACE_ID}/datasets/{DATASET_ID}/datasources",
-        Constant.REPORT_GET: "{POWERBI_BASE_URL}/{WORKSPACE_ID}/reports/{REPORT_ID}",
-        Constant.REPORT_LIST: "{POWERBI_BASE_URL}/{WORKSPACE_ID}/reports",
-        Constant.SCAN_GET: "{POWERBI_ADMIN_BASE_URL}/workspaces/scanStatus/{SCAN_ID}",
-        Constant.SCAN_RESULT_GET: "{POWERBI_ADMIN_BASE_URL}/workspaces/scanResult/{SCAN_ID}",
-        Constant.SCAN_CREATE: "{POWERBI_ADMIN_BASE_URL}/workspaces/getInfo",
-        Constant.PAGE_BY_REPORT: "{POWERBI_BASE_URL}/{WORKSPACE_ID}/reports/{REPORT_ID}/pages",
-    }
-
-    SCOPE: str = "https://analysis.windows.net/powerbi/api/.default"
-    BASE_URL: str = "https://api.powerbi.com/v1.0/myorg/groups"
-    ADMIN_BASE_URL: str = "https://api.powerbi.com/v1.0/myorg/admin"
-    AUTHORITY: str = "https://login.microsoftonline.com/"
-
-    @dataclass
-    class Workspace:
-        """
-        PowerBi Workspace
-        """
-
-        id: str
-        name: str
-        state: str
-        dashboards: List[Any]
-        datasets: Dict[str, "PowerBiAPI.PowerBIDataset"]
-
-    @dataclass
-    class DataSource:
-        """
-        PowerBi
-        """
-
-        id: str
-        type: str
-        raw_connection_detail: Dict
-
-        def __members(self):
-            return (self.id,)
-
-        def __eq__(self, instance):
-            return (
-                isinstance(instance, PowerBiAPI.DataSource)
-                and self.__members() == instance.__members()
-            )
-
-        def __hash__(self):
-            return hash(self.__members())
-
-    @dataclass
-    class Table:
-        name: str
-        full_name: str
-        expression: Optional[str]
-
-    # dataclasses for PowerBi Dashboard
-    @dataclass
-    class PowerBIDataset:
-        id: str
-        name: str
-        webUrl: Optional[str]
-        workspace_id: str
-        # Table in datasets
-        tables: List["PowerBiAPI.Table"]
-
-        def get_urn_part(self):
-            return f"datasets.{self.id}"
-
-        def __members(self):
-            return (self.id,)
-
-        def __eq__(self, instance):
-            return (
-                isinstance(instance, PowerBiAPI.PowerBIDataset)
-                and self.__members() == instance.__members()
-            )
-
-        def __hash__(self):
-            return hash(self.__members())
-
-    @dataclass
-    class Page:
-        id: str
-        displayName: str
-        name: str
-        order: int
-
-        def get_urn_part(self):
-            return f"pages.{self.id}"
-
-    @dataclass
-    class User:
-        id: str
-        displayName: str
-        emailAddress: str
-        graphId: str
-        principalType: str
-
-        def get_urn_part(self):
-            return f"users.{self.id}"
-
-        def __members(self):
-            return (self.id,)
-
-        def __eq__(self, instance):
-            return (
-                isinstance(instance, PowerBiAPI.User)
-                and self.__members() == instance.__members()
-            )
-
-        def __hash__(self):
-            return hash(self.__members())
-
-    @dataclass
-    class Report:
-        id: str
-        name: str
-        webUrl: str
-        embedUrl: str
-        description: str
-        dataset: Optional["PowerBiAPI.PowerBIDataset"]
-        pages: List["PowerBiAPI.Page"]
-        users: List["PowerBiAPI.User"]
-
-        def get_urn_part(self):
-            return f"reports.{self.id}"
-
-    @dataclass
-    class Tile:
-        class CreatedFrom(Enum):
-            REPORT = "Report"
-            DATASET = "Dataset"
-            VISUALIZATION = "Visualization"
-            UNKNOWN = "UNKNOWN"
-
-        id: str
-        title: str
-        embedUrl: str
-        dataset: Optional["PowerBiAPI.PowerBIDataset"]
-        report: Optional[Any]
-        createdFrom: CreatedFrom
-
-        def get_urn_part(self):
-            return f"charts.{self.id}"
-
-    @dataclass
-    class Dashboard:
-        id: str
-        displayName: str
-        embedUrl: str
-        webUrl: str
-        isReadOnly: Any
-        workspace_id: str
-        workspace_name: str
-        tiles: List["PowerBiAPI.Tile"]
-        users: List["PowerBiAPI.User"]
-
-        def get_urn_part(self):
-            return f"dashboards.{self.id}"
-
-        def __members(self):
-            return (self.id,)
-
-        def __eq__(self, instance):
-            return (
-                isinstance(instance, PowerBiAPI.Dashboard)
-                and self.__members() == instance.__members()
-            )
-
-        def __hash__(self):
-            return hash(self.__members())
-
-    def __init__(self, config: PowerBiAPIConfig) -> None:
-        self.__config: PowerBiAPIConfig = config
-        self.__access_token: str = ""
-        # Power-Bi Auth (Service Principal Auth)
-        self.__msal_client = msal.ConfidentialClientApplication(
-            self.__config.client_id,
-            client_credential=self.__config.client_secret,
-            authority=PowerBiAPI.AUTHORITY + self.__config.tenant_id,
-        )
-
-        # Test connection by generating a access token
-        LOGGER.info("Trying to connect to {}".format(self.__get_authority_url()))
-        self.get_access_token()
-        LOGGER.info("Able to connect to {}".format(self.__get_authority_url()))
-
-    def __get_authority_url(self):
-        return "{}{}".format(PowerBiAPI.AUTHORITY, self.__config.tenant_id)
-
-    def __get_users(self, workspace_id: str, entity: str, id: str) -> List[User]:
-        """
-        Get user for the given PowerBi entity
-        """
-        users: List[PowerBiAPI.User] = []
-        if self.__config.extract_ownership is False:
-            LOGGER.info(
-                "ExtractOwnership capabilities is disabled from configuration and hence returning empty users list"
-            )
-            return users
-
-        user_list_endpoint: str = PowerBiAPI.API_ENDPOINTS[Constant.ENTITY_USER_LIST]
-        # Replace place holders
-        user_list_endpoint = user_list_endpoint.format(
-            POWERBI_ADMIN_BASE_URL=PowerBiAPI.ADMIN_BASE_URL,
-            ENTITY=entity,
-            ENTITY_ID=id,
-        )
-        # Hit PowerBi
-        LOGGER.info(f"Request to URL={user_list_endpoint}")
-        response = requests.get(
-            user_list_endpoint,
-            headers={Constant.Authorization: self.get_access_token()},
-        )
-
-        # Check if we got response from PowerBi
-        if response.status_code != 200:
-            LOGGER.warning(
-                f"Failed to fetch user list from power-bi for, http_status={response.status_code}, message={response.text}"
-            )
-
-            LOGGER.info(f"{Constant.WorkspaceId}={workspace_id}")
-            LOGGER.info(f"{Constant.ENTITY}={entity}")
-            LOGGER.info(f"{Constant.ID}={id}")
-            raise ConnectionError("Failed to fetch the user list from the power-bi")
-
-        users_dict: List[Any] = response.json()[Constant.VALUE]
-
-        # Iterate through response and create a list of PowerBiAPI.Dashboard
-        users = [
-            PowerBiAPI.User(
-                id=instance.get("identifier"),
-                displayName=instance.get("displayName"),
-                emailAddress=instance.get("emailAddress"),
-                graphId=instance.get("graphId"),
-                principalType=instance.get("principalType"),
-            )
-            for instance in users_dict
-        ]
-
-        return users
-
-    def __get_report(self, workspace_id: str, report_id: str) -> "PowerBiAPI.Report":
-        """
-        Fetch the report from PowerBi for the given report identifier
-        """
-        if workspace_id is None or report_id is None:
-            LOGGER.info("Input values are None")
-            LOGGER.info(f"{Constant.WorkspaceId}={workspace_id}")
-            LOGGER.info(f"{Constant.ReportId}={report_id}")
-            return None
-
-        report_get_endpoint: str = PowerBiAPI.API_ENDPOINTS[Constant.REPORT_GET]
-        # Replace place holders
-        report_get_endpoint = report_get_endpoint.format(
-            POWERBI_BASE_URL=PowerBiAPI.BASE_URL,
-            WORKSPACE_ID=workspace_id,
-            REPORT_ID=report_id,
-        )
-        # Hit PowerBi
-        LOGGER.info(f"Request to report URL={report_get_endpoint}")
-        response = requests.get(
-            report_get_endpoint,
-            headers={Constant.Authorization: self.get_access_token()},
-        )
-
-        # Check if we got response from PowerBi
-        if response.status_code != 200:
-            message: str = "Failed to fetch report from power-bi for"
-            LOGGER.warning(message)
-            LOGGER.warning(f"{Constant.WorkspaceId}={workspace_id}")
-            LOGGER.warning(f"{Constant.ReportId}={report_id}")
-            raise ConnectionError(message)
-
-        response_dict = response.json()
-
-        return PowerBiAPI.Report(
-            id=response_dict.get("id"),
-            name=response_dict.get("name"),
-            webUrl=response_dict.get("webUrl"),
-            embedUrl=response_dict.get("embedUrl"),
-            description=response_dict.get("description"),
-            users=[],
-            pages=[],
-            dataset=self.get_dataset(
-                workspace_id=workspace_id, dataset_id=response_dict.get("datasetId")
-            ),
-        )
-
-    def get_access_token(self):
-        if self.__access_token != "":
-            LOGGER.info("Returning the cached access token")
-            return self.__access_token
-
-        LOGGER.info("Generating PowerBi access token")
-
-        auth_response = self.__msal_client.acquire_token_for_client(
-            scopes=[PowerBiAPI.SCOPE]
-        )
-
-        if not auth_response.get("access_token"):
-            LOGGER.warn(
-                "Failed to generate the PowerBi access token. Please check input configuration"
-            )
-            raise ConfigurationError(
-                "Powerbi authorization failed . Please check your input configuration."
-            )
-
-        LOGGER.info("Generated PowerBi access token")
-
-        self.__access_token = "Bearer {}".format(auth_response.get("access_token"))
-
-        LOGGER.debug(f"{Constant.PBIAccessToken}={self.__access_token}")
-
-        return self.__access_token
-
-    def get_dashboard_users(self, dashboard: Dashboard) -> List[User]:
-        """
-        Return list of dashboard users
-        """
-        return self.__get_users(
-            workspace_id=dashboard.workspace_id, entity="dashboards", id=dashboard.id
-        )
-
-    def get_dashboards(self, workspace: Workspace) -> List[Dashboard]:
-        """
-        Get the list of dashboard from PowerBi for the given workspace identifier
-
-        TODO: Pagination. As per REST API doc (https://docs.microsoft.com/en-us/rest/api/power-bi/dashboards/get-dashboards), there is no information available on pagination
-        """
-        dashboard_list_endpoint: str = PowerBiAPI.API_ENDPOINTS[Constant.DASHBOARD_LIST]
-        # Replace place holders
-        dashboard_list_endpoint = dashboard_list_endpoint.format(
-            POWERBI_BASE_URL=PowerBiAPI.BASE_URL, WORKSPACE_ID=workspace.id
-        )
-        # Hit PowerBi
-        LOGGER.info(f"Request to URL={dashboard_list_endpoint}")
-        response = requests.get(
-            dashboard_list_endpoint,
-            headers={Constant.Authorization: self.get_access_token()},
-        )
-
-        # Check if we got response from PowerBi
-        if response.status_code != 200:
-            LOGGER.warning("Failed to fetch dashboard list from power-bi for")
-            LOGGER.warning(f"{Constant.WorkspaceId}={workspace.id}")
-            raise ConnectionError(
-                "Failed to fetch the dashboard list from the power-bi"
-            )
-
-        dashboards_dict: List[Any] = response.json()[Constant.VALUE]
-
-        # Iterate through response and create a list of PowerBiAPI.Dashboard
-        dashboards: List[PowerBiAPI.Dashboard] = [
-            PowerBiAPI.Dashboard(
-                id=instance.get("id"),
-                isReadOnly=instance.get("isReadOnly"),
-                displayName=instance.get("displayName"),
-                embedUrl=instance.get("embedUrl"),
-                webUrl=instance.get("webUrl"),
-                workspace_id=workspace.id,
-                workspace_name=workspace.name,
-                tiles=[],
-                users=[],
-            )
-            for instance in dashboards_dict
-            if instance is not None
-        ]
-
-        return dashboards
-
-    def get_dataset(self, workspace_id: str, dataset_id: str) -> Any:
-        """
-        Fetch the dataset from PowerBi for the given dataset identifier
-        """
-        if workspace_id is None or dataset_id is None:
-            LOGGER.info("Input values are None")
-            LOGGER.info(f"{Constant.WorkspaceId}={workspace_id}")
-            LOGGER.info(f"{Constant.DatasetId}={dataset_id}")
-            return None
-
-        dataset_get_endpoint: str = PowerBiAPI.API_ENDPOINTS[Constant.DATASET_GET]
-        # Replace place holders
-        dataset_get_endpoint = dataset_get_endpoint.format(
-            POWERBI_BASE_URL=PowerBiAPI.BASE_URL,
-            WORKSPACE_ID=workspace_id,
-            DATASET_ID=dataset_id,
-        )
-        # Hit PowerBi
-        LOGGER.info(f"Request to dataset URL={dataset_get_endpoint}")
-        response = requests.get(
-            dataset_get_endpoint,
-            headers={Constant.Authorization: self.get_access_token()},
-        )
-
-        # Check if we got response from PowerBi
-        if response.status_code != 200:
-            message: str = "Failed to fetch dataset from power-bi for"
-            LOGGER.warning(message)
-            LOGGER.warning(f"{Constant.WorkspaceId}={workspace_id}")
-            LOGGER.warning(f"{Constant.DatasetId}={dataset_id}")
-            raise ConnectionError(message)
-
-        response_dict = response.json()
-        LOGGER.debug("datasets = {}".format(response_dict))
-        # PowerBi Always return the webURL, in-case if it is None then setting complete webURL to None instead of None/details
-        return PowerBiAPI.PowerBIDataset(
-            id=response_dict.get("id"),
-            name=response_dict.get("name"),
-            webUrl="{}/details".format(response_dict.get("webUrl"))
-            if response_dict.get("webUrl") is not None
-            else None,
-            workspace_id=workspace_id,
-            tables=[],
-        )
-
-    def get_data_sources(
-        self, dataset: PowerBIDataset
-    ) -> Optional[Dict[str, "PowerBiAPI.DataSource"]]:
-        """
-        Fetch the data source from PowerBi for the given dataset
-        """
-
-        datasource_get_endpoint: str = PowerBiAPI.API_ENDPOINTS[Constant.DATASOURCE_GET]
-        # Replace place holders
-        datasource_get_endpoint = datasource_get_endpoint.format(
-            POWERBI_BASE_URL=PowerBiAPI.BASE_URL,
-            WORKSPACE_ID=dataset.workspace_id,
-            DATASET_ID=dataset.id,
-        )
-        # Hit PowerBi
-        LOGGER.info(f"Request to datasource URL={datasource_get_endpoint}")
-        response = requests.get(
-            datasource_get_endpoint,
-            headers={Constant.Authorization: self.get_access_token()},
-        )
-
-        # Check if we got response from PowerBi
-        if response.status_code != 200:
-            message: str = "Failed to fetch datasource from power-bi for"
-            LOGGER.warning(message)
-            LOGGER.warning("{}={}".format(Constant.WorkspaceId, dataset.workspace_id))
-            LOGGER.warning("{}={}".format(Constant.DatasetId, dataset.id))
-            LOGGER.warning("{}={}".format(Constant.HTTP_RESPONSE_TEXT, response.text))
-            LOGGER.warning(
-                "{}={}".format(Constant.HTTP_RESPONSE_STATUS_CODE, response.status_code)
-            )
-
-            raise ConnectionError(message)
-
-        res = response.json()
-        value = res["value"]
-        if len(value) == 0:
-            LOGGER.info(
-                f"datasource is not found for dataset {dataset.name}({dataset.id})"
-            )
-
-            return None
-
-        data_sources: Dict[str, "PowerBiAPI.DataSource"] = {}
-        LOGGER.debug("data-sources = {}".format(value))
-        for datasource_dict in value:
-            # Create datasource instance with basic detail available
-            datasource = PowerBiAPI.DataSource(
-                id=datasource_dict.get(
-                    "datasourceId"
-                ),  # datasourceId is not available in all cases
-                type=datasource_dict["datasourceType"],
-                raw_connection_detail=datasource_dict["connectionDetails"],
-            )
-
-            data_sources[datasource.id] = datasource
-
-        return data_sources
-
-    def get_tiles(self, workspace: Workspace, dashboard: Dashboard) -> List[Tile]:
-
-        """
-        Get the list of tiles from PowerBi for the given workspace identifier
-
-        TODO: Pagination. As per REST API doc (https://docs.microsoft.com/en-us/rest/api/power-bi/dashboards/get-tiles), there is no information available on pagination
-        """
-
-        def new_dataset_or_report(tile_instance: Any) -> dict:
-            """
-            Find out which is the data source for tile. It is either REPORT or DATASET
-            """
-            report_fields = {
-                "dataset": (
-                    workspace.datasets[tile_instance.get("datasetId")]
-                    if tile_instance.get("datasetId") is not None
-                    else None
-                ),
-                "report": (
-                    self.__get_report(
-                        workspace_id=workspace.id,
-                        report_id=tile_instance.get("reportId"),
-                    )
-                    if tile_instance.get("reportId") is not None
-                    else None
-                ),
-                "createdFrom": PowerBiAPI.Tile.CreatedFrom.UNKNOWN,
-            }
-
-            # Tile is either created from report or dataset or from custom visualization
-            if report_fields["report"] is not None:
-                report_fields["createdFrom"] = PowerBiAPI.Tile.CreatedFrom.REPORT
-            elif report_fields["dataset"] is not None:
-                report_fields["createdFrom"] = PowerBiAPI.Tile.CreatedFrom.DATASET
-            else:
-                report_fields["createdFrom"] = PowerBiAPI.Tile.CreatedFrom.VISUALIZATION
-
-            LOGGER.info(
-                f'Tile {tile_instance.get("title")}({tile_instance.get("id")}) is created from {report_fields["createdFrom"]}'
-            )
-
-            return report_fields
-
-        tile_list_endpoint: str = PowerBiAPI.API_ENDPOINTS[Constant.TILE_LIST]
-        # Replace place holders
-        tile_list_endpoint = tile_list_endpoint.format(
-            POWERBI_BASE_URL=PowerBiAPI.BASE_URL,
-            WORKSPACE_ID=dashboard.workspace_id,
-            DASHBOARD_ID=dashboard.id,
-        )
-        # Hit PowerBi
-        LOGGER.info("Request to URL={}".format(tile_list_endpoint))
-        response = requests.get(
-            tile_list_endpoint,
-            headers={Constant.Authorization: self.get_access_token()},
-        )
-
-        # Check if we got response from PowerBi
-        if response.status_code != 200:
-            LOGGER.warning("Failed to fetch tiles list from power-bi for")
-            LOGGER.warning("{}={}".format(Constant.WorkspaceId, workspace.id))
-            LOGGER.warning("{}={}".format(Constant.DashboardId, dashboard.id))
-            raise ConnectionError("Failed to fetch the tile list from the power-bi")
-
-        # Iterate through response and create a list of PowerBiAPI.Dashboard
-        tile_dict: List[Any] = response.json()[Constant.VALUE]
-        LOGGER.debug("Tile Dict = {}".format(tile_dict))
-        tiles: List[PowerBiAPI.Tile] = [
-            PowerBiAPI.Tile(
-                id=instance.get("id"),
-                title=instance.get("title"),
-                embedUrl=instance.get("embedUrl"),
-                **new_dataset_or_report(instance),
-            )
-            for instance in tile_dict
-            if instance is not None
-        ]
-
-        return tiles
-
-    def get_pages_by_report(
-        self, workspace_id: str, report_id: str
-    ) -> List["PowerBiAPI.Page"]:
-        """
-        Fetch the report from PowerBi for the given report identifier
-        """
-        if workspace_id is None or report_id is None:
-            LOGGER.info("workspace_id or report_id is None")
-            return []
-
-        pages_endpoint: str = PowerBiAPI.API_ENDPOINTS[Constant.PAGE_BY_REPORT]
-        # Replace place holders
-        pages_endpoint = pages_endpoint.format(
-            POWERBI_BASE_URL=PowerBiAPI.BASE_URL,
-            WORKSPACE_ID=workspace_id,
-            REPORT_ID=report_id,
-        )
-        # Hit PowerBi
-        LOGGER.info(f"Request to pages URL={pages_endpoint}")
-        response = requests.get(
-            pages_endpoint,
-            headers={Constant.Authorization: self.get_access_token()},
-        )
-
-        # Check if we got response from PowerBi
-        if response.status_code != 200:
-            message: str = "Failed to fetch reports from power-bi for"
-            LOGGER.warning(message)
-            LOGGER.warning(f"{Constant.WorkspaceId}={workspace_id}")
-            raise ConnectionError(message)
-
-        response_dict = response.json()
-        return [
-            PowerBiAPI.Page(
-                id="{}.{}".format(report_id, raw_instance["name"].replace(" ", "_")),
-                name=raw_instance["name"],
-                displayName=raw_instance.get("displayName"),
-                order=raw_instance.get("order"),
-            )
-            for raw_instance in response_dict["value"]
-        ]
-
-    def get_reports(
-        self, workspace: "PowerBiAPI.Workspace"
-    ) -> List["PowerBiAPI.Report"]:
-        """
-        Fetch the report from PowerBi for the given report identifier
-        """
-        if workspace is None:
-            LOGGER.info("workspace is None")
-            LOGGER.info(f"{Constant.WorkspaceId}={workspace.id}")
-            return []
-
-        report_list_endpoint: str = PowerBiAPI.API_ENDPOINTS[Constant.REPORT_LIST]
-        # Replace place holders
-        report_list_endpoint = report_list_endpoint.format(
-            POWERBI_BASE_URL=PowerBiAPI.BASE_URL,
-            WORKSPACE_ID=workspace.id,
-        )
-        # Hit PowerBi
-        LOGGER.info(f"Request to report URL={report_list_endpoint}")
-        response = requests.get(
-            report_list_endpoint,
-            headers={Constant.Authorization: self.get_access_token()},
-        )
-
-        # Check if we got response from PowerBi
-        if response.status_code != 200:
-            message: str = "Failed to fetch reports from power-bi for"
-            LOGGER.warning(message)
-            LOGGER.warning(f"{Constant.WorkspaceId}={workspace.id}")
-            raise ConnectionError(message)
-
-        response_dict = response.json()
-        reports: List["PowerBiAPI.Report"] = [
-            PowerBiAPI.Report(
-                id=raw_instance["id"],
-                name=raw_instance.get("name"),
-                webUrl=raw_instance.get("webUrl"),
-                embedUrl=raw_instance.get("embedUrl"),
-                description=raw_instance.get("description"),
-                pages=self.get_pages_by_report(
-                    workspace_id=workspace.id, report_id=raw_instance["id"]
-                ),
-                users=self.__get_users(
-                    workspace_id=workspace.id, entity="reports", id=raw_instance["id"]
-                ),
-                dataset=workspace.datasets.get(raw_instance.get("datasetId")),
-            )
-            for raw_instance in response_dict["value"]
-        ]
-
-        return reports
-
-    # flake8: noqa: C901
-    def get_workspace(
-        self, workspace_id: str, reporter: PowerBiDashboardSourceReport
-    ) -> Workspace:
-        """
-        Return Workspace for the given workspace identifier i.e. workspace_id
-        """
-        scan_create_endpoint = PowerBiAPI.API_ENDPOINTS[Constant.SCAN_CREATE]
-        scan_create_endpoint = scan_create_endpoint.format(
-            POWERBI_ADMIN_BASE_URL=PowerBiAPI.ADMIN_BASE_URL
-        )
-
-        def create_scan_job():
-            """
-            Create scan job on PowerBi for the workspace
-            """
-            request_body = {"workspaces": [workspace_id]}
-
-            res = requests.post(
-                scan_create_endpoint,
-                data=request_body,
-                params={
-                    "datasetExpressions": True,
-                    "datasetSchema": True,
-                    "datasourceDetails": True,
-                    "getArtifactUsers": True,
-                    "lineage": True,
-                },
-                headers={Constant.Authorization: self.get_access_token()},
-            )
-
-            if res.status_code not in (200, 202):
-                message = f"API({scan_create_endpoint}) return error code {res.status_code} for workspace id({workspace_id})"
-
-                LOGGER.warning(message)
-
-                raise ConnectionError(message)
-            # Return Id of Scan created for the given workspace
-            id = res.json()["id"]
-            LOGGER.info("Scan id({})".format(id))
-            return id
-
-        def wait_for_scan_to_complete(scan_id: str, timeout: int) -> Boolean:
-            """
-            Poll the PowerBi service for workspace scan to complete
-            """
-            minimum_sleep = 3
-            if timeout < minimum_sleep:
-                LOGGER.info(
-                    f"Setting timeout to minimum_sleep time {minimum_sleep} seconds"
-                )
-                timeout = minimum_sleep
-
-            max_trial = timeout // minimum_sleep
-            LOGGER.info(f"Max trial {max_trial}")
-            scan_get_endpoint = PowerBiAPI.API_ENDPOINTS[Constant.SCAN_GET]
-            scan_get_endpoint = scan_get_endpoint.format(
-                POWERBI_ADMIN_BASE_URL=PowerBiAPI.ADMIN_BASE_URL, SCAN_ID=scan_id
-            )
-
-            LOGGER.info(f"Hitting URL={scan_get_endpoint}")
-
-            trail = 1
-            while True:
-                LOGGER.info(f"Trial = {trail}")
-                res = requests.get(
-                    scan_get_endpoint,
-                    headers={Constant.Authorization: self.get_access_token()},
-                )
-                if res.status_code != 200:
-                    message = f"API({scan_get_endpoint}) return error code {res.status_code} for scan id({scan_id})"
-
-                    LOGGER.warning(message)
-
-                    raise ConnectionError(message)
-
-                if res.json()["status"].upper() == "Succeeded".upper():
-                    LOGGER.info(f"Scan result is available for scan id({scan_id})")
-                    return True
-
-                if trail == max_trial:
-                    break
-                LOGGER.info(f"Sleeping for {minimum_sleep} seconds")
-                sleep(minimum_sleep)
-                trail += 1
-
-            # Result is not available
-            return False
-
-        def get_scan_result(scan_id: str) -> dict:
-            LOGGER.info("Fetching scan  result")
-            LOGGER.info(f"{Constant.SCAN_ID}={scan_id}")
-            scan_result_get_endpoint = PowerBiAPI.API_ENDPOINTS[
-                Constant.SCAN_RESULT_GET
-            ]
-            scan_result_get_endpoint = scan_result_get_endpoint.format(
-                POWERBI_ADMIN_BASE_URL=PowerBiAPI.ADMIN_BASE_URL, SCAN_ID=scan_id
-            )
-
-            LOGGER.info(f"Hitting URL={scan_result_get_endpoint}")
-            res = requests.get(
-                scan_result_get_endpoint,
-                headers={Constant.Authorization: self.get_access_token()},
-            )
-            if res.status_code != 200:
-                message = f"API({scan_result_get_endpoint}) return error code {res.status_code} for scan id({scan_id})"
-
-                LOGGER.warning(message)
-
-                raise ConnectionError(message)
-
-            return res.json()["workspaces"][0]
-
-        def json_to_dataset_map(scan_result: dict) -> dict:
-            """
-            Filter out "dataset" from scan_result and return PowerBiAPI.Dataset instance set
-            """
-            datasets: Optional[Any] = scan_result.get("datasets")
-            dataset_map: dict = {}
-
-            if datasets is None or len(datasets) == 0:
-                LOGGER.warning(
-                    f'Workspace {scan_result["name"]}({scan_result["id"]}) does not have datasets'
-                )
-
-                LOGGER.info("Returning empty datasets")
-                return dataset_map
-
-            for dataset_dict in datasets:
-                dataset_instance: PowerBiAPI.PowerBIDataset = self.get_dataset(
-                    workspace_id=scan_result["id"],
-                    dataset_id=dataset_dict["id"],
-                )
-                dataset_map[dataset_instance.id] = dataset_instance
-                # set dataset-name
-                dataset_name: str = (
-                    dataset_instance.name
-                    if dataset_instance.name is not None
-                    else dataset_instance.id
-                )
-
-                for table in dataset_dict["tables"]:
-                    expression: str = (
-                        table["source"][0]["expression"]
-                        if table.get("source") is not None and len(table["source"]) > 0
-                        else None
-                    )
-                    dataset_instance.tables.append(
-                        PowerBiAPI.PowerBIDataset.Table(
-                            name=table["name"],
-                            full_name="{}.{}".format(
-                                dataset_name.replace(" ", "_"),
-                                table["name"].replace(" ", "_"),
-                            ),
-                            expression=expression,
-                        )
-                    )
-
-            return dataset_map
-
-        def init_dashboard_tiles(workspace: PowerBiAPI.Workspace) -> None:
-            for dashboard in workspace.dashboards:
-                dashboard.tiles = self.get_tiles(workspace, dashboard=dashboard)
-
-            return None
-
-        LOGGER.info("Creating scan job for workspace")
-        LOGGER.info("{}={}".format(Constant.WorkspaceId, workspace_id))
-        LOGGER.info("Hitting URL={}".format(scan_create_endpoint))
-        scan_id = create_scan_job()
-        LOGGER.info("Waiting for scan to complete")
-        if (
-            wait_for_scan_to_complete(
-                scan_id=scan_id, timeout=self.__config.scan_timeout
-            )
-            is False
-        ):
-            raise ValueError(
-                "Workspace detail is not available. Please increase scan_timeout to wait."
-            )
-
-        # Scan is complete lets take the result
-        scan_result = get_scan_result(scan_id=scan_id)
-        LOGGER.debug(f"scan result = {scan_result}")
-        import json
-        print(json.dumps(scan_result, indent=1))
-        workspace = PowerBiAPI.Workspace(
-            id=scan_result["id"],
-            name=scan_result["name"],
-            state=scan_result["state"],
-            datasets={},
-            dashboards=[],
-        )
-        # Get workspace dashboards
-        workspace.dashboards = self.get_dashboards(workspace)
-
-        workspace.datasets = json_to_dataset_map(scan_result)
-        init_dashboard_tiles(workspace)
 
-        return workspace
 
 
 class Mapper:
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index 0ffa9e635f43f..341906eae48d9 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -1,6 +1,7 @@
 from lark import Tree
 
 from datahub.ingestion.source.powerbi import m_parser
+from datahub.ingestion.source.powerbi.proxy import PowerBiAPI
 
 M_QUERIES = [
     'let\n    Source = Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","PBI_TEST_WAREHOUSE_PROD",[Role="PBI_TEST_MEMBER"]),\n    PBI_TEST_Database = Source{[Name="PBI_TEST",Kind="Database"]}[Data],\n    TEST_Schema = PBI_TEST_Database{[Name="TEST",Kind="Schema"]}[Data],\n    TESTTABLE_Table = TEST_Schema{[Name="TESTTABLE",Kind="Table"]}[Data]\nin\n    TESTTABLE_Table',
@@ -97,4 +98,9 @@
 #     assert m_parser.get_output_variable(parse_tree) == 'two_source_table'
 
 def test_get_upstream():
-    m_parser.get_upstream_tables(M_QUERIES[0], None)
+    table: PowerBiAPI.Table = PowerBiAPI.Table(
+        expression=M_QUERIES[0],
+        name="table-name",
+        full_name="db-name.schema-name.table-name",
+    )
+    m_parser.get_upstream_tables(table, None)

From 46dcafd15949f966ec8480b52b9021778b38c819 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Thu, 8 Dec 2022 00:04:00 +0530
Subject: [PATCH 15/53] WIP

---
 .../ingestion/source/powerbi/m_parser.py      | 58 +++++++++++++++----
 .../integration/powerbi/test_m_parser.py      |  3 +-
 2 files changed, 50 insertions(+), 11 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
index 04d6dd5da69f3..75cc48172e95c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
@@ -5,11 +5,11 @@
 from datahub.ingestion.source.powerbi.config import PowerBiDashboardSourceReport
 from datahub.ingestion.source.powerbi.proxy import PowerBiAPI
 import logging
-from typing import List, Optional, Any, Dict
+from typing import List, Optional, Any, Dict, Union
 
 from lark import Lark, Tree, Token
 
-logger = logging.getLogger(__name__)
+LOGGER = logging.getLogger(__name__)
 
 
 @dataclass
@@ -87,23 +87,61 @@ def parse_expression(expression: str) -> Tree:
 
     parse_tree: Tree = lark_parser.parse(expression)
 
-    logger.debug("Parse Tree")
-    if logger.level == logging.DEBUG:  # Guard condition to avoid heavy pretty() function call
-        logger.debug(parse_tree.pretty())
+    LOGGER.debug("Parse Tree")
+    if LOGGER.level == logging.DEBUG:  # Guard condition to avoid heavy pretty() function call
+        LOGGER.debug(parse_tree.pretty())
 
     return parse_tree
 
 
+def get_resolver(parse_tree: Tree) -> Optional[AbstractMQueryResolver]:
+    #import pdb; pdb.set_trace()
+
+    _filter: Any = parse_tree.find_data("invoke_expression")
+
+    def print_leaf(node: Tree):
+        print(node.pretty())
+        # if isinstance(node, Tree) and isinstance(tree.children[0], Token):
+        #     print("It is token")
+        #     return
+        #
+        # for child in tree.children:
+        #     print_leaf(child)
+
+    #print(next(next(_filter).children[0].find_data("letter_character")))
+    _filter = next(_filter).children[0].find_data("letter_character")
+    for node in _filter:
+        print('======')
+        print(node)
+        print('======')
+
+
+    return None
+
+
 def get_upstream_tables(table: PowerBiAPI.Table, reporter: PowerBiDashboardSourceReport) -> List[DataPlatformTable]:
     parse_tree = parse_expression(table.expression)
 
     output_variable = get_output_variable(parse_tree)
 
-    filter: Any = parse_tree.find_data("invoke_expression")
-    tokens: List[Any] = list(filter)
-    print("Length = {}".format(len(tokens)))
-    for tree in tokens:
-        print(tree.pretty())
+    _filter: Any = parse_tree.find_data("invoke_expression")
+    trees: List[Tree] = list(_filter)
+    if len(trees) > 1:
+        reporter.report_warning(table.full_name, f"{table.full_name} has more than one invoke expression")
+        return []
+
+    #print(trees[0])
+
+    resolver: AbstractMQueryResolver = get_resolver(parse_tree)
+    if resolver is None:
+        LOGGER.debug("Table full-name = %s", table.full_name)
+        LOGGER.debug("Expression = %s", table.expression)
+        reporter.report_warning(
+            table.full_name,
+            f"{table.full_name} M-Query resolver not found for the table expression"
+        )
+        return []
+
 
     # filter: Any = parse_tree.find_data("variable")
     # def find_variable(node: Tree, variable: str) -> bool:
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index 341906eae48d9..e5c2eb4b864ec 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -2,6 +2,7 @@
 
 from datahub.ingestion.source.powerbi import m_parser
 from datahub.ingestion.source.powerbi.proxy import PowerBiAPI
+from datahub.ingestion.source.powerbi.config import PowerBiDashboardSourceReport
 
 M_QUERIES = [
     'let\n    Source = Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","PBI_TEST_WAREHOUSE_PROD",[Role="PBI_TEST_MEMBER"]),\n    PBI_TEST_Database = Source{[Name="PBI_TEST",Kind="Database"]}[Data],\n    TEST_Schema = PBI_TEST_Database{[Name="TEST",Kind="Schema"]}[Data],\n    TESTTABLE_Table = TEST_Schema{[Name="TESTTABLE",Kind="Table"]}[Data]\nin\n    TESTTABLE_Table',
@@ -103,4 +104,4 @@ def test_get_upstream():
         name="table-name",
         full_name="db-name.schema-name.table-name",
     )
-    m_parser.get_upstream_tables(table, None)
+    m_parser.get_upstream_tables(table, PowerBiDashboardSourceReport())

From c5c5acecdc545ad9e5da578e36c649b672a90a60 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Fri, 9 Dec 2022 23:20:10 +0530
Subject: [PATCH 16/53] working code for postgres

---
 .../ingestion/source/powerbi/m_parser.py      | 363 +++++++++++++-----
 .../ingestion/source/powerbi/powerbi.py       |  10 +-
 .../integration/powerbi/test_m_parser.py      |  69 ++--
 3 files changed, 300 insertions(+), 142 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
index 75cc48172e95c..de091127660ae 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
@@ -1,11 +1,12 @@
-from abc import ABC
+from abc import ABC, abstractmethod
+from enum import Enum
 
 from dataclasses import  dataclass
 import importlib.resources as pkg_resource
 from datahub.ingestion.source.powerbi.config import PowerBiDashboardSourceReport
 from datahub.ingestion.source.powerbi.proxy import PowerBiAPI
 import logging
-from typing import List, Optional, Any, Dict, Union
+from typing import List, Optional, Any, Dict, Union, cast
 
 from lark import Lark, Tree, Token
 
@@ -19,64 +20,280 @@ class DataPlatformTable:
     platform_type: str
 
 
+class SupportedDataPlatform(Enum):
+    POSTGRES_SQL = "PostgreSQL"
+    ORACLE = "Oracle"
+    MY_SQL = "MySql"
+    SNOWFLAKE = "Snowflake"
+
+
+def _get_output_variable(root: Tree) -> Optional[str]:
+    def get_token_list_for_any(tree: Tree, rules: List[str]) -> List[Tree]:
+        for rule in rules:
+            token_list = [x for x in tree.find_data(rule)]
+            if len(token_list) > 0:
+                return token_list
+
+        return []
+
+    for tree in root.find_data("in_expression"):
+        for child1 in get_token_list_for_any(
+            tree, ["letter_character", "quoted_identifier"]
+        ):
+            return child1.children[0].value  # type: ignore
+
+    return None
+
+
+def _get_variable_statement(parse_tree: Tree, variable: str) -> Optional[Tree]:
+    _filter = parse_tree.find_data("variable")
+    # filter will return statement of the form <variable-name> = <expression>
+    # We are searching for Tree where variable-name is matching with provided variable
+    for tree in _filter:
+        values: List[str] = _token_values(tree.children[0])
+        if len(values) > 1:
+            # Rare chances to happen as PowerBI Grammar only have one identifier in variable-name rule
+            LOGGER.info("Found more than one value in variable_name rule")
+            return None
+
+        if variable == values[0]:
+            return tree
+
+    LOGGER.info("Provided variable(%s) not found in variable rule", variable)
+
+    return None
+
+
+def _get_first_rule(tree: Tree, rule: str) -> Optional[Tree]:
+    """
+    Lark library doesn't have advance search function.
+    This function will return the first tree of provided rule
+    :param tree: Tree to search for the expression rule
+    :return: Tree
+    """
+    def internal(node: Union[Tree, Token]) -> Optional[Tree]:
+        if isinstance(node, Tree) and node.data == rule:
+            return node
+        if isinstance(node, Token):
+            return None
+
+        for child in cast(Tree, node).children:
+            node = internal(child)
+            if node is not None:
+                return node
+
+    expression_tree: Optional[Tree] = internal(tree)
+
+    return expression_tree
+
+
+def _token_values(tree: Tree) -> List[str]:
+    """
+
+    :param tree: Tree to traverse
+    :return: List of leaf token data
+    """
+    values: List[str] = []
+
+    def internal(node: Union[Tree, Token]):
+        if isinstance(node, Token):
+            values.append(cast(Token, node).value)
+            return
+
+        for child in node.children:
+            internal(child)
+
+    internal(tree)
+
+    return values
+
+
+def _remove_whitespaces_from_list(values: List[str]) -> List[str]:
+    result: List[str] = []
+    for item in values:
+        if item.strip() not in ('', '\n', '\t'):
+            result.append(item)
+
+    return result
+
+
+def _strip_char_from_list(values: List[str], char: str) -> List[str]:
+    result: List[str] = []
+    for item in values:
+        result.append(item.strip(char))
+
+    return result
+
+
+def _make_function_name(tree: Tree) -> str:
+    values: List[str] = _token_values(tree)
+    return ".".join(values)
+
+
 class AbstractMQueryResolver(ABC):
     pass
 
 
 class AbstractDataAccessMQueryResolver(AbstractMQueryResolver, ABC):
-    pass
-
+    table: PowerBiAPI.Table
+    parse_tree: Tree
+    reporter: PowerBiDashboardSourceReport
+
+    def __init__(self, table: PowerBiAPI.Table, parse_tree: Tree, reporter: PowerBiDashboardSourceReport):
+        self.table = table
+        self.parse_tree = parse_tree
+        self.reporter = reporter
+
+    @abstractmethod
+    def resolve_to_data_platform_table_list(self) -> List[DataPlatformTable]:
+        pass
+
+
+class RelationalMQueryResolver(AbstractDataAccessMQueryResolver, ABC):
+
+    def get_item_selector_tokens(self, variable_statement: Tree) -> (str, List[str]):
+        expression_tree: Tree = _get_first_rule(variable_statement, "expression")
+        item_selector: Tree = _get_first_rule(expression_tree, "item_selector")
+        identifier_tree: Tree = _get_first_rule(expression_tree, "identifier")
+        # remove whitespaces and quotes from token
+        tokens: List[str] = _strip_char_from_list(_remove_whitespaces_from_list(_token_values(item_selector)), "\"")
+        identifier: List[str] = _token_values(identifier_tree)
+        # convert tokens to dict
+        iterator = iter(tokens)
+        return identifier[0], dict(zip(iterator, iterator))
+
+    def get_argument_list(self, variable_statement: Tree) -> List[str]:
+        expression_tree: Tree = _get_first_rule(variable_statement, "expression")
+        argument_list: Tree = _get_first_rule(expression_tree, "argument_list")
+        # remove whitespaces and quotes from token
+        tokens: List[str] = _strip_char_from_list(_remove_whitespaces_from_list(_token_values(argument_list)), "\"")
+        return tokens
+
+    def resolve_to_data_platform_table_list(self) -> List[DataPlatformTable]:
+        data_platform_tables: List[DataPlatformTable] = []
+        # Look for output variable
+        output_variable: str = _get_output_variable(self.parse_tree)
+        if output_variable is None:
+            self.reporter.warnings(
+                f"{self.table.full_name}-output-variable",
+                "output-variable not found in table expression",
+            )
+            return data_platform_tables
+
+        full_table_name: str = self.get_full_table_name(output_variable)
+        if full_table_name is None:
+            LOGGER.debug("Fail to form full_table_name for PowerBI DataSet table %s", self.table.full_name)
+            return data_platform_tables
+
+        return [
+            DataPlatformTable(
+                name=full_table_name.split(".")[-1],
+                full_name=full_table_name,
+                platform_type=self.get_platform()
+            ),
+        ]
+
+    @abstractmethod
+    def get_platform(self) -> str:
+        pass
+
+    @abstractmethod
+    def get_full_table_name(self, output_variable: str) -> str:
+        pass
+
+
+class PostgresMQueryResolver(RelationalMQueryResolver):
+    def get_full_table_name(self, output_variable: str) -> Optional[str]:
+        variable_statement: Tree = _get_variable_statement(self.parse_tree, output_variable)
+        if variable_statement is None:
+            self.reporter.warnings(
+                f"{self.table.full_name}-variable-statement",
+                "output variable statement not found in table expression",
+            )
+            return None
+        source, tokens = self.get_item_selector_tokens(variable_statement)
+        schema_name: str = tokens["Schema"]
+        table_name: str = tokens["Item"]
+        # Look for database-name
+        variable_statement = _get_variable_statement(self.parse_tree, source)
+        if variable_statement is None:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-source-statement",
+                "source variable statement not found in table expression",
+            )
+            return None
+        tokens = self.get_argument_list(variable_statement)
+        if len(tokens) < 1:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-database-arg-list",
+                "Number of expected tokens in argument list are not present in table expression",
+            )
+            return None
+
+        database_name: str = tokens[1]  # 1st token is database name
+        return f"{database_name}.{schema_name}.{table_name}"
+
+    def get_platform(self) -> str:
+        return SupportedDataPlatform.POSTGRES_SQL.value
 
-class PostgresMQueryResolver(AbstractDataAccessMQueryResolver):
-    pass
 
 
 class OracleMQueryResolver(AbstractDataAccessMQueryResolver):
-    pass
+    def resolve_to_data_platform_table_list(self) -> List[DataPlatformTable]:
+        return [
+            DataPlatformTable(
+                name="postgres_table",
+                full_name="book.public.test",
+                platform_type="Oracle"
+            ),
+        ]
 
 
 class SnowflakeMQueryResolver(AbstractDataAccessMQueryResolver):
-    pass
+    def resolve_to_data_platform_table_list(self) -> List[DataPlatformTable]:
+        return [
+            DataPlatformTable(
+                name="postgres_table",
+                full_name="book.public.test",
+                platform_type="Snowflake"
+            ),
+        ]
 
 
-class AbstractTableAccessMQueryResolver(AbstractDataAccessMQueryResolver, ABC):
-    pass
-
-
-class TableCombineMQueryResolver(AbstractTableAccessMQueryResolver):
-    pass
+def _get_resolver(parse_tree: Tree) -> Optional[AbstractMQueryResolver]:
 
+    _filter: Any = parse_tree.find_data("invoke_expression")
 
-DATA_ACCESS_RESOLVER: Dict[str, AbstractMQueryResolver.__class__] = {
-    "PostgreSQL.Database": PostgresMQueryResolver,
-    "Oracle.Database": OracleMQueryResolver,
-    "Snowflake.Database": SnowflakeMQueryResolver,
-}
+    letter_tree: Tree = next(_filter).children[0]
+    data_access_func: str = _make_function_name(letter_tree)
 
-TABLE_ACCESS_RESOLVER: Dict[str, AbstractMQueryResolver.__class__] = {
-    "Table.Combine": TableCombineMQueryResolver,
-}
+    LOGGER.debug(
+        "Looking for data-access(%s) resolver in data-access-function registry %s",
+        data_access_func,
+        DATA_ACCESS_RESOLVER,
+    )
 
+    if DATA_ACCESS_RESOLVER.get(data_access_func) is None:
+        LOGGER.info("Resolver not found for %s", data_access_func)
+        return None
 
-def get_output_variable(root: Tree) -> Optional[str]:
-    def get_token_list_for_any(tree: Tree, rules: List[str]) -> List[Tree]:
-        for rule in rules:
-            token_list = [x for x in tree.find_data(rule)]
-            if len(token_list) > 0:
-                return token_list
+    return DATA_ACCESS_RESOLVER[data_access_func]
 
-        return []
 
-    for tree in root.find_data("in_expression"):
-        for child1 in get_token_list_for_any(
-            tree, ["letter_character", "quoted_identifier"]
-        ):
-            return child1.children[0].value  # type: ignore
+# Register M-Query resolver for specific database platform
+DATA_ACCESS_RESOLVER: Dict[str, AbstractDataAccessMQueryResolver.__class__] = {
+    f"{SupportedDataPlatform.POSTGRES_SQL.value}.Database": PostgresMQueryResolver,
+    f"{SupportedDataPlatform.ORACLE.value}.Database": OracleMQueryResolver,
+    f"{SupportedDataPlatform.SNOWFLAKE.value}.Databases": SnowflakeMQueryResolver,
+}
 
-    return None
+# Register M-Query resolver for function call to resolve function arguments
+TABLE_ACCESS_RESOLVER: Dict[str, AbstractMQueryResolver.__class__] = {
+    "Table.Combine": None,
+}
 
 
-def parse_expression(expression: str) -> Tree:
+def _parse_expression(expression: str) -> Tree:
     # Read lexical grammar as text
     grammar: str = pkg_resource.read_text(
         "datahub.ingestion.source.powerbi", "powerbi-lexical-grammar.rule"
@@ -94,45 +311,15 @@ def parse_expression(expression: str) -> Tree:
     return parse_tree
 
 
-def get_resolver(parse_tree: Tree) -> Optional[AbstractMQueryResolver]:
-    #import pdb; pdb.set_trace()
-
-    _filter: Any = parse_tree.find_data("invoke_expression")
-
-    def print_leaf(node: Tree):
-        print(node.pretty())
-        # if isinstance(node, Tree) and isinstance(tree.children[0], Token):
-        #     print("It is token")
-        #     return
-        #
-        # for child in tree.children:
-        #     print_leaf(child)
-
-    #print(next(next(_filter).children[0].find_data("letter_character")))
-    _filter = next(_filter).children[0].find_data("letter_character")
-    for node in _filter:
-        print('======')
-        print(node)
-        print('======')
-
-
-    return None
-
-
 def get_upstream_tables(table: PowerBiAPI.Table, reporter: PowerBiDashboardSourceReport) -> List[DataPlatformTable]:
-    parse_tree = parse_expression(table.expression)
-
-    output_variable = get_output_variable(parse_tree)
+    parse_tree = _parse_expression(table.expression)
 
-    _filter: Any = parse_tree.find_data("invoke_expression")
-    trees: List[Tree] = list(_filter)
+    trees: List[Tree] = list(parse_tree.find_data("invoke_expression"))
     if len(trees) > 1:
         reporter.report_warning(table.full_name, f"{table.full_name} has more than one invoke expression")
         return []
 
-    #print(trees[0])
-
-    resolver: AbstractMQueryResolver = get_resolver(parse_tree)
+    resolver: AbstractDataAccessMQueryResolver = _get_resolver(parse_tree)
     if resolver is None:
         LOGGER.debug("Table full-name = %s", table.full_name)
         LOGGER.debug("Expression = %s", table.expression)
@@ -142,40 +329,4 @@ def get_upstream_tables(table: PowerBiAPI.Table, reporter: PowerBiDashboardSourc
         )
         return []
 
-
-    # filter: Any = parse_tree.find_data("variable")
-    # def find_variable(node: Tree, variable: str) -> bool:
-    #     for internal_child in node.children:
-    #         if isinstance(internal_child, Token):
-    #             if internal_child.value == variable:
-    #                 return True
-    #             continue
-    #         return find_variable(internal_child, variable)
-    #
-    #     return False
-    #
-    # for tree in filter:
-    #     if find_variable(tree, output_variable):
-    #         print("Mohd1")
-    #         print(tree.pretty())
-    #         for node in tree.find_data("field_selection"):
-    #             print("Mohd2")
-    #             print(node)
-
-    return [
-        DataPlatformTable(
-            name="postgres_table",
-            full_name="book.public.test",
-            platform_type="PostgreSql"
-        ),
-        DataPlatformTable(
-            name="oracle_table",
-            full_name="book.public.test",
-            platform_type="Oracle"
-        ),
-        DataPlatformTable(
-            name="snowflake_table",
-            full_name="book.public.test",
-            platform_type="Snowflake"
-        ),
-    ]
+    return resolver(table, parse_tree, reporter).resolve_to_data_platform_table_list()
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
index e11525c6f0cdd..7d484cf757b00 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
@@ -41,15 +41,17 @@
 )
 from datahub.utilities.dedup_list import deduplicate_list
 from datahub.ingestion.source.powerbi import m_parser
-from datahub.ingestion.source.powerbi.config import PowerBiDashboardSourceReport, PowerBiDashboardSourceConfig, PlatformDetail
+from datahub.ingestion.source.powerbi.config import (
+    PowerBiDashboardSourceReport,
+    PowerBiDashboardSourceConfig,
+    PlatformDetail,
+    Constant
+)
 from datahub.ingestion.source.powerbi.proxy import PowerBiAPI
 # Logger instance
 LOGGER = logging.getLogger(__name__)
 
 
-
-
-
 class Mapper:
     """
     Transfrom PowerBi concepts Dashboard, Dataset and Tile to DataHub concepts Dashboard, Dataset and Chart
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index e5c2eb4b864ec..3c08c9faea59b 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -18,90 +18,95 @@
     "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select *,#(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS Agent,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Industries'#(lf)and TARGET_TEAM = 'Enterprise'\", null, [EnableFolding=true])\nin\n    Source",
     'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="Select#(lf)*,#(lf)concat((UPPER(REPLACE(SALES_SPECIALIST,\'-\',\'\'))),#(lf)LEFT(CAST(INVOICE_DATE AS DATE),4)+LEFT(RIGHT(CAST(INVOICE_DATE AS DATE),5),2)) AS AGENT_KEY,#(lf)CASE#(lf)    WHEN CLASS = \'Software\' and (NOT(PRODUCT in (\'ADV\', \'Adv\') and left(ACCOUNT_ID,2)=\'10\') #(lf)    or V_ENTERPRISE_INVOICED_REVENUE.TYPE = \'Manual Adjustment\') THEN INVOICE_AMOUNT#(lf)    WHEN V_ENTERPRISE_INVOICED_REVENUE.TYPE IN (\'Recurring\',\'0\') THEN INVOICE_AMOUNT#(lf)    ELSE 0#(lf)END as SOFTWARE_INV#(lf)#(lf)from V_ENTERPRISE_INVOICED_REVENUE", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #"Added Conditional Column" = Table.AddColumn(Source, "Services", each if [CLASS] = "Services" then [INVOICE_AMOUNT] else 0),\n    #"Added Custom" = Table.AddColumn(#"Added Conditional Column", "Advanced New Sites", each if [PRODUCT] = "ADV"\nor [PRODUCT] = "Adv"\nthen [NEW_SITE]\nelse 0)\nin\n    #"Added Custom"',
     'let\n    Source = Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]),\n Source2 = PostgreSQL.Database(\"localhost\", \"mics\"),\n  public_order_date = Source2{[Schema=\"public\",Item=\"order_date\"]}[Data],\n    GSL_TEST_DB_Database = Source{[Name=\"GSL_TEST_DB\",Kind=\"Database\"]}[Data],\n  PUBLIC_Schema = GSL_TEST_DB_Database{[Name=\"PUBLIC\",Kind=\"Schema\"]}[Data],\n   SALES_ANALYST_VIEW_View = PUBLIC_Schema{[Name=\"SALES_ANALYST_VIEW\",Kind=\"View\"]}[Data],\n  two_source_table  = Table.Combine({public_order_date, SALES_ANALYST_VIEW_View})\n in\n    two_source_table',
+    'let\n Source = PostgreSQL.Database("localhost"  ,   "mics"      ),\n  public_order_date =    Source{[Schema="public",Item="order_date"]}[Data] \n in \n public_order_date',
 ]
 
 
 # def test_parse_m_query1():
 #     expression: str = M_QUERIES[0]
-#     parse_tree: Tree = m_parser.parse_expression(expression)
-#     assert m_parser.get_output_variable(parse_tree) == "TESTTABLE_Table"
+#     parse_tree: Tree = m_parser._parse_expression(expression)
+#     assert m_parser._get_output_variable(parse_tree) == "TESTTABLE_Table"
 #
 #
 # def test_parse_m_query2():
 #     expression: str = M_QUERIES[1]
-#     parse_tree: Tree = m_parser.parse_expression(expression)
-#     assert m_parser.get_output_variable(parse_tree) == '"Added Custom2"'
+#     parse_tree: Tree = m_parser._parse_expression(expression)
+#     assert m_parser._get_output_variable(parse_tree) == '"Added Custom2"'
 #
 #
 # def test_parse_m_query3():
 #     expression: str = M_QUERIES[2]
-#     parse_tree: Tree = m_parser.parse_expression(expression)
-#     assert m_parser.get_output_variable(parse_tree) == '"Added Conditional Column"'
+#     parse_tree: Tree = m_parser._parse_expression(expression)
+#     assert m_parser._get_output_variable(parse_tree) == '"Added Conditional Column"'
 #
 #
 # def test_parse_m_query4():
 #     expression: str = M_QUERIES[3]
-#     parse_tree: Tree = m_parser.parse_expression(expression)
-#     assert m_parser.get_output_variable(parse_tree) == '"Changed Type"'
+#     parse_tree: Tree = m_parser._parse_expression(expression)
+#     assert m_parser._get_output_variable(parse_tree) == '"Changed Type"'
 #
 #
 # def test_parse_m_query5():
 #     expression: str = M_QUERIES[4]
-#     parse_tree: Tree = m_parser.parse_expression(expression)
-#     assert m_parser.get_output_variable(parse_tree) == '"Renamed Columns"'
+#     parse_tree: Tree = m_parser._parse_expression(expression)
+#     assert m_parser._get_output_variable(parse_tree) == '"Renamed Columns"'
 #
 #
 # def test_parse_m_query6():
 #     expression: str = M_QUERIES[5]
-#     parse_tree: Tree = m_parser.parse_expression(expression)
-#     assert m_parser.get_output_variable(parse_tree) == '"Added Custom"'
+#     parse_tree: Tree = m_parser._parse_expression(expression)
+#     assert m_parser._get_output_variable(parse_tree) == '"Added Custom"'
 #
 #
 # def test_parse_m_query7():
 #     expression: str = M_QUERIES[6]
-#     parse_tree: Tree = m_parser.parse_expression(expression)
-#     assert m_parser.get_output_variable(parse_tree) == "Source"
+#     parse_tree: Tree = m_parser._parse_expression(expression)
+#     assert m_parser._get_output_variable(parse_tree) == "Source"
 #
 #
 # def test_parse_m_query8():
 #     expression: str = M_QUERIES[7]
-#     parse_tree: Tree = m_parser.parse_expression(expression)
-#     assert m_parser.get_output_variable(parse_tree) == '"Added Custom1"'
+#     parse_tree: Tree = m_parser._parse_expression(expression)
+#     assert m_parser._get_output_variable(parse_tree) == '"Added Custom1"'
 #
 #
 # def test_parse_m_query9():
 #     expression: str = M_QUERIES[8]
-#     parse_tree: Tree = m_parser.parse_expression(expression)
-#     assert m_parser.get_output_variable(parse_tree) == '"Added Custom1"'
+#     parse_tree: Tree = m_parser._parse_expression(expression)
+#     assert m_parser._get_output_variable(parse_tree) == '"Added Custom1"'
 #
 #
 # def test_parse_m_query10():
 #     expression: str = M_QUERIES[9]
-#     parse_tree: Tree = m_parser.parse_expression(expression)
-#     assert m_parser.get_output_variable(parse_tree) == '"Changed Type1"'
+#     parse_tree: Tree = m_parser._parse_expression(expression)
+#     assert m_parser._get_output_variable(parse_tree) == '"Changed Type1"'
 #
 #
 # def test_parse_m_query11():
 #     expression: str = M_QUERIES[10]
-#     parse_tree: Tree = m_parser.parse_expression(expression)
-#     assert m_parser.get_output_variable(parse_tree) == "Source"
+#     parse_tree: Tree = m_parser._parse_expression(expression)
+#     assert m_parser._get_output_variable(parse_tree) == "Source"
 #
 #
 # def test_parse_m_query12():
 #     expression: str = M_QUERIES[11]
-#     parse_tree: Tree = m_parser.parse_expression(expression)
-#     assert m_parser.get_output_variable(parse_tree) == '"Added Custom"'
+#     parse_tree: Tree = m_parser._parse_expression(expression)
+#     assert m_parser._get_output_variable(parse_tree) == '"Added Custom"'
 #
 #
 # def test_parse_m_query13():
 #     expression: str = M_QUERIES[12]
-#     parse_tree: Tree = m_parser.parse_expression(expression)
-#     assert m_parser.get_output_variable(parse_tree) == 'two_source_table'
+#     parse_tree: Tree = m_parser._parse_expression(expression)
+#     assert m_parser._get_output_variable(parse_tree) == 'two_source_table'
 
 def test_get_upstream():
-    table: PowerBiAPI.Table = PowerBiAPI.Table(
-        expression=M_QUERIES[0],
-        name="table-name",
-        full_name="db-name.schema-name.table-name",
-    )
-    m_parser.get_upstream_tables(table, PowerBiDashboardSourceReport())
+    qs = [M_QUERIES[0], M_QUERIES[-1]]
+    for q in qs:
+        table: PowerBiAPI.Table = PowerBiAPI.Table(
+            expression=q,
+            name="table-name",
+            full_name="db-name.schema-name.table-name",
+        )
+        reporter = PowerBiDashboardSourceReport()
+        print(m_parser.get_upstream_tables(table, reporter))
+

From c86b23f19099135a4b80a12c944c6277ce530771 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Sat, 10 Dec 2022 22:37:30 +0530
Subject: [PATCH 17/53] WIP

---
 .../ingestion/source/powerbi/config.py        |  23 +-
 .../ingestion/source/powerbi/m_parser.py      | 188 +++++++++++----
 .../ingestion/source/powerbi/powerbi.py       |  72 +++---
 .../integration/powerbi/test_m_parser.py      | 217 +++++++++++-------
 4 files changed, 326 insertions(+), 174 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
index 164dfb63ed612..b4a1422a11ada 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
@@ -1,4 +1,7 @@
 import pydantic
+
+from pydantic import validator
+
 import datahub.emitter.mce_builder as builder
 
 from dataclasses import field as dataclass_field
@@ -8,6 +11,7 @@
 from datahub.configuration.source_common import EnvBasedSourceConfigBase, DEFAULT_ENV
 from typing import Dict, Union
 from datahub.ingestion.api.source import SourceReport
+from datahub.ingestion.source.powerbi import m_parser
 
 class Constant:
     """
@@ -83,7 +87,6 @@ def report_charts_dropped(self, view: str) -> None:
 
 @dataclass
 class PlatformDetail:
-    platform: str = pydantic.Field(description="DataHub platform name. Example postgres or oracle or snowflake")
     platform_instance: str = pydantic.Field(default=None, description="DataHub platform instance name. It should be same as you have used in ingestion receipe of DataHub platform ingestion source")
     env: str = pydantic.Field(
         default=DEFAULT_ENV,
@@ -114,11 +117,29 @@ class PowerBiAPIConfig(EnvBasedSourceConfigBase):
     extract_ownership: bool = pydantic.Field(
         default=True, description="Whether ownership should be ingested"
     )
+    # Enable/Disable extracting lineage information of PowerBI Dataset
+    extract_lineage: bool = pydantic.Field(
+        default=True, description="Whether lineage should be ingested"
+    )
     # Enable/Disable extracting report information
     extract_reports: bool = pydantic.Field(
         default=True, description="Whether reports should be ingested"
     )
 
+    @validator("dataset_type_mapping")
+    @classmethod
+    def check_dataset_type_mapping(cls, value):
+        # For backward compatibility map input PostgreSql to PostgreSQL
+        if "PostgreSql" in value.keys():
+            platform_name = value["PostgreSql"]
+            del value["PostgreSql"]
+            value["PostgreSQL"] = platform_name
+
+        for key in value.keys():
+            if key not in m_parser.POWERBI_TO_DATAHUB_DATA_PLATFORM_MAPPING.keys():
+                raise ValueError(f"DataPlatform {key} is not supported")
+        return value
+
 
 class PowerBiDashboardSourceConfig(PowerBiAPIConfig):
     platform_name: str = "powerbi"
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
index de091127660ae..7a69aad810a01 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
@@ -1,5 +1,6 @@
 from abc import ABC, abstractmethod
 from enum import Enum
+from functools import partial
 
 from dataclasses import  dataclass
 import importlib.resources as pkg_resource
@@ -23,26 +24,22 @@ class DataPlatformTable:
 class SupportedDataPlatform(Enum):
     POSTGRES_SQL = "PostgreSQL"
     ORACLE = "Oracle"
-    MY_SQL = "MySql"
     SNOWFLAKE = "Snowflake"
 
 
-def _get_output_variable(root: Tree) -> Optional[str]:
-    def get_token_list_for_any(tree: Tree, rules: List[str]) -> List[Tree]:
-        for rule in rules:
-            token_list = [x for x in tree.find_data(rule)]
-            if len(token_list) > 0:
-                return token_list
-
-        return []
+POWERBI_TO_DATAHUB_DATA_PLATFORM_MAPPING: Dict[str, str] = {
+    SupportedDataPlatform.POSTGRES_SQL.value: "postgres",
+    SupportedDataPlatform.ORACLE.value: "oracle",
+    SupportedDataPlatform.SNOWFLAKE.value: "snowflake",
+}
 
-    for tree in root.find_data("in_expression"):
-        for child1 in get_token_list_for_any(
-            tree, ["letter_character", "quoted_identifier"]
-        ):
-            return child1.children[0].value  # type: ignore
 
-    return None
+def _get_output_variable(root: Tree) -> Optional[str]:
+    in_expression_tree: Tree = _get_first_rule(root, "in_expression")
+    # Get list of terminal value
+    # Remove any whitespaces
+    # Remove any spaces
+    return "".join(_strip_char_from_list(_remove_whitespaces_from_list(_token_values(in_expression_tree)), " "))
 
 
 def _get_variable_statement(parse_tree: Tree, variable: str) -> Optional[Tree]:
@@ -51,12 +48,11 @@ def _get_variable_statement(parse_tree: Tree, variable: str) -> Optional[Tree]:
     # We are searching for Tree where variable-name is matching with provided variable
     for tree in _filter:
         values: List[str] = _token_values(tree.children[0])
-        if len(values) > 1:
-            # Rare chances to happen as PowerBI Grammar only have one identifier in variable-name rule
-            LOGGER.info("Found more than one value in variable_name rule")
-            return None
+        actual_value: str = "".join(_strip_char_from_list(values, " "))
+        LOGGER.info("Actual Value = %s", actual_value)
+        LOGGER.info("Expected Value = %s", variable)
 
-        if variable == values[0]:
+        if actual_value == variable:
             return tree
 
     LOGGER.info("Provided variable(%s) not found in variable rule", variable)
@@ -143,18 +139,24 @@ def __init__(self, table: PowerBiAPI.Table, parse_tree: Tree, reporter: PowerBiD
         self.table = table
         self.parse_tree = parse_tree
         self.reporter = reporter
+        self.first_expression_func = partial(_get_first_rule, rule="expression")
+        self.first_item_selector_func = partial(_get_first_rule, rule="item_selector")
+        self.first_arg_list_func = partial(_get_first_rule, rule="argument_list")
+        self.first_identifier_func = partial(_get_first_rule, rule="identifier")
+
+
 
     @abstractmethod
     def resolve_to_data_platform_table_list(self) -> List[DataPlatformTable]:
         pass
 
 
-class RelationalMQueryResolver(AbstractDataAccessMQueryResolver, ABC):
+class BaseMQueryResolver(AbstractDataAccessMQueryResolver, ABC):
 
     def get_item_selector_tokens(self, variable_statement: Tree) -> (str, List[str]):
-        expression_tree: Tree = _get_first_rule(variable_statement, "expression")
-        item_selector: Tree = _get_first_rule(expression_tree, "item_selector")
-        identifier_tree: Tree = _get_first_rule(expression_tree, "identifier")
+        expression_tree: Tree = self.first_expression_func(variable_statement)
+        item_selector: Tree = self.first_item_selector_func(expression_tree)
+        identifier_tree: Tree = self.first_identifier_func(expression_tree)
         # remove whitespaces and quotes from token
         tokens: List[str] = _strip_char_from_list(_remove_whitespaces_from_list(_token_values(item_selector)), "\"")
         identifier: List[str] = _token_values(identifier_tree)
@@ -163,8 +165,8 @@ def get_item_selector_tokens(self, variable_statement: Tree) -> (str, List[str])
         return identifier[0], dict(zip(iterator, iterator))
 
     def get_argument_list(self, variable_statement: Tree) -> List[str]:
-        expression_tree: Tree = _get_first_rule(variable_statement, "expression")
-        argument_list: Tree = _get_first_rule(expression_tree, "argument_list")
+        expression_tree: Tree = self.first_expression_func(variable_statement)
+        argument_list: Tree = self.first_arg_list_func(expression_tree)
         # remove whitespaces and quotes from token
         tokens: List[str] = _strip_char_from_list(_remove_whitespaces_from_list(_token_values(argument_list)), "\"")
         return tokens
@@ -202,13 +204,13 @@ def get_full_table_name(self, output_variable: str) -> str:
         pass
 
 
-class PostgresMQueryResolver(RelationalMQueryResolver):
+class PostgresMQueryResolver(BaseMQueryResolver):
     def get_full_table_name(self, output_variable: str) -> Optional[str]:
         variable_statement: Tree = _get_variable_statement(self.parse_tree, output_variable)
         if variable_statement is None:
-            self.reporter.warnings(
+            self.reporter.report_warning(
                 f"{self.table.full_name}-variable-statement",
-                "output variable statement not found in table expression",
+                f"output variable ({output_variable}) statement not found in table expression",
             )
             return None
         source, tokens = self.get_item_selector_tokens(variable_statement)
@@ -219,14 +221,14 @@ def get_full_table_name(self, output_variable: str) -> Optional[str]:
         if variable_statement is None:
             self.reporter.report_warning(
                 f"{self.table.full_name}-source-statement",
-                "source variable statement not found in table expression",
+                f"source variable {source} statement not found in table expression",
             )
             return None
         tokens = self.get_argument_list(variable_statement)
         if len(tokens) < 1:
             self.reporter.report_warning(
                 f"{self.table.full_name}-database-arg-list",
-                "Number of expected tokens in argument list are not present in table expression",
+                "Expected number of argument not found in data-access function of table expression",
             )
             return None
 
@@ -237,27 +239,115 @@ def get_platform(self) -> str:
         return SupportedDataPlatform.POSTGRES_SQL.value
 
 
+class OracleMQueryResolver(BaseMQueryResolver):
+    def get_platform(self) -> str:
+        return SupportedDataPlatform.ORACLE.value
 
-class OracleMQueryResolver(AbstractDataAccessMQueryResolver):
-    def resolve_to_data_platform_table_list(self) -> List[DataPlatformTable]:
-        return [
-            DataPlatformTable(
-                name="postgres_table",
-                full_name="book.public.test",
-                platform_type="Oracle"
-            ),
-        ]
+    def _get_db_name(self, value: str) -> Optional[str]:
+        error_message: str = f"The target argument ({value}) should in the format of <host-name>:<port>/<db-name>[.<domain>]"
+        splitter_result: List[str] = value.split("/")
+        if len(splitter_result) != 2:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-oracle-target",
+                error_message
+            )
+            return None
 
+        db_name = splitter_result[1].split(".")[0]
 
-class SnowflakeMQueryResolver(AbstractDataAccessMQueryResolver):
-    def resolve_to_data_platform_table_list(self) -> List[DataPlatformTable]:
-        return [
-            DataPlatformTable(
-                name="postgres_table",
-                full_name="book.public.test",
-                platform_type="Snowflake"
-            ),
-        ]
+        return db_name
+
+    def get_full_table_name(self, output_variable: str) -> str:
+        # Find step for the output variable
+        variable_statement: Tree = _get_variable_statement(self.parse_tree, output_variable)
+
+        if variable_statement is None:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-variable-statement",
+                f"output variable ({output_variable}) statement not found in table expression",
+            )
+            return None
+
+        schema_variable, tokens = self.get_item_selector_tokens(variable_statement)
+        table_name: str = tokens["Name"]
+
+        # Find step for the schema variable
+        variable_statement = _get_variable_statement(self.parse_tree, schema_variable)
+        if variable_statement is None:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-schema-variable-statement",
+                f"schema variable ({schema_variable}) statement not found in table expression",
+            )
+            return None
+
+        source_variable, tokens = self.get_item_selector_tokens(variable_statement)
+        schema_name: str = tokens["Schema"]
+
+        # Find step for the database access variable
+        variable_statement = _get_variable_statement(self.parse_tree, source_variable)
+        if variable_statement is None:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-source-variable-statement",
+                f"schema variable ({source_variable}) statement not found in table expression",
+            )
+            return None
+        tokens = self.get_argument_list(variable_statement)
+        if len(tokens) < 1:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-database-arg-list",
+                "Expected number of argument not found in data-access function of table expression",
+            )
+            return None
+        # The first argument has database name. format localhost:1521/salesdb.GSLAB.COM
+        db_name: Optional[str] = self._get_db_name(tokens[0])
+        if db_name is None:
+            LOGGER.debug(f"Fail to extract db name from the target {tokens[0]}")
+
+        return f"{db_name}.{schema_name}.{table_name}"
+
+
+class SnowflakeMQueryResolver(BaseMQueryResolver):
+    def get_platform(self) -> str:
+        return SupportedDataPlatform.SNOWFLAKE.value
+
+    def get_full_table_name(self, output_variable: str) -> str:
+        # Find step for the output variable
+        variable_statement: Tree = _get_variable_statement(self.parse_tree, output_variable)
+
+        if variable_statement is None:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-variable-statement",
+                f"output variable ({output_variable}) statement not found in table expression",
+            )
+            return None
+
+        schema_variable, tokens = self.get_item_selector_tokens(variable_statement)
+        table_name: str = tokens["Name"]
+
+        # Find step for the schema variable
+        variable_statement = _get_variable_statement(self.parse_tree, schema_variable)
+        if variable_statement is None:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-schema-variable-statement",
+                f"schema variable ({schema_variable}) statement not found in table expression",
+            )
+            return None
+
+        source_variable, tokens = self.get_item_selector_tokens(variable_statement)
+        schema_name: str = tokens["Name"]
+
+        # Find step for the database access variable
+        variable_statement = _get_variable_statement(self.parse_tree, source_variable)
+        if variable_statement is None:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-source-variable-statement",
+                f"schema variable ({source_variable}) statement not found in table expression",
+            )
+            return None
+        _, tokens = self.get_item_selector_tokens(variable_statement)
+        db_name: str = tokens["Name"]
+
+        return f"{db_name}.{schema_name}.{table_name}"
 
 
 def _get_resolver(parse_tree: Tree) -> Optional[AbstractMQueryResolver]:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
index 7d484cf757b00..2878ffdc5680a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
@@ -148,45 +148,43 @@ def __to_datahub_dataset(
                 aspect_name=Constant.STATUS,
                 aspect=StatusClass(removed=False),
             )
+            dataset_mcps.extend([info_mcp, status_mcp])
+
             # Check if upstreams table is available, parse them and create dataset URN for each upstream table
-            upstreams: List[UpstreamClass] = []
-            upstream_tables: List[DataPlatformTable] = m_parser.get_upstream_tables(table.expression, self.__reporter)
-            for upstream_table in upstream_tables:
-                platform: Union[str, PlatformDetail] = self.__config.dataset_type_mapping[upstream_table.platform_type]
-                platform_name: str = None
-                platform_instance_name: str = None
-                platform_env: str = DEFAULT_ENV
-                # Determine if PlatformDetail is provided
-                if isinstance(platform, PlatformDetail):
-                    platform_name = cast(PlatformDetail, platform).platform
-                    platform_instance_name = cast(PlatformDetail, platform).platform_instance
-                    platform_env = cast(PlatformDetail, platform).env
-                else:
-                    platform_name = platform
-
-                upstream_urn = builder.make_dataset_urn_with_platform_instance(
-                    platform=platform_name,
-                    platform_instance=platform_instance_name,
-                    env=platform_env,
-                    name=upstream_table.full_name,
-                )
-                upstream_table = UpstreamClass(
-                    upstream_urn,
-                    DatasetLineageTypeClass.TRANSFORMED,
-                )
-                upstreams.append(upstream_table)
-
-                if len(upstreams) > 0:
-                    upstream_lineage = UpstreamLineageClass(upstreams=upstreams)
-                    mcp = MetadataChangeProposalWrapper(
-                        entityType="dataset",
-                        changeType=ChangeTypeClass.UPSERT,
-                        entityUrn=ds_urn,
-                        aspect=upstream_lineage,
+            if self.__config.extract_lineage is True:
+                upstreams: List[UpstreamClass] = []
+                upstream_tables: List[DataPlatformTable] = m_parser.get_upstream_tables(table.expression, self.__reporter)
+                for upstream_table in upstream_tables:
+                    platform: Union[str, PlatformDetail] = self.__config.dataset_type_mapping[upstream_table.platform_type]
+                    platform_name: str = m_parser.POWERBI_TO_DATAHUB_DATA_PLATFORM_MAPPING[upstream_table.platform_type]
+                    platform_instance_name: str = None
+                    platform_env: str = DEFAULT_ENV
+                    # Determine if PlatformDetail is provided
+                    if isinstance(platform, PlatformDetail):
+                        platform_instance_name = cast(PlatformDetail, platform).platform_instance
+                        platform_env = cast(PlatformDetail, platform).env
+
+                    upstream_urn = builder.make_dataset_urn_with_platform_instance(
+                        platform=platform_name,
+                        platform_instance=platform_instance_name,
+                        env=platform_env,
+                        name=upstream_table.full_name,
                     )
-                    dataset_mcps.extend([mcp])
-
-            dataset_mcps.extend([info_mcp, status_mcp])
+                    upstream_table = UpstreamClass(
+                        upstream_urn,
+                        DatasetLineageTypeClass.TRANSFORMED,
+                    )
+                    upstreams.append(upstream_table)
+
+                    if len(upstreams) > 0:
+                        upstream_lineage = UpstreamLineageClass(upstreams=upstreams)
+                        mcp = MetadataChangeProposalWrapper(
+                            entityType="dataset",
+                            changeType=ChangeTypeClass.UPSERT,
+                            entityUrn=ds_urn,
+                            aspect=upstream_lineage,
+                        )
+                        dataset_mcps.extend([mcp])
 
         return dataset_mcps
 
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index 3c08c9faea59b..9128b5912b86b 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -1,6 +1,9 @@
+from typing import List
+
 from lark import Tree
 
 from datahub.ingestion.source.powerbi import m_parser
+from datahub.ingestion.source.powerbi.m_parser import DataPlatformTable, SupportedDataPlatform
 from datahub.ingestion.source.powerbi.proxy import PowerBiAPI
 from datahub.ingestion.source.powerbi.config import PowerBiDashboardSourceReport
 
@@ -19,94 +22,134 @@
     'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="Select#(lf)*,#(lf)concat((UPPER(REPLACE(SALES_SPECIALIST,\'-\',\'\'))),#(lf)LEFT(CAST(INVOICE_DATE AS DATE),4)+LEFT(RIGHT(CAST(INVOICE_DATE AS DATE),5),2)) AS AGENT_KEY,#(lf)CASE#(lf)    WHEN CLASS = \'Software\' and (NOT(PRODUCT in (\'ADV\', \'Adv\') and left(ACCOUNT_ID,2)=\'10\') #(lf)    or V_ENTERPRISE_INVOICED_REVENUE.TYPE = \'Manual Adjustment\') THEN INVOICE_AMOUNT#(lf)    WHEN V_ENTERPRISE_INVOICED_REVENUE.TYPE IN (\'Recurring\',\'0\') THEN INVOICE_AMOUNT#(lf)    ELSE 0#(lf)END as SOFTWARE_INV#(lf)#(lf)from V_ENTERPRISE_INVOICED_REVENUE", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #"Added Conditional Column" = Table.AddColumn(Source, "Services", each if [CLASS] = "Services" then [INVOICE_AMOUNT] else 0),\n    #"Added Custom" = Table.AddColumn(#"Added Conditional Column", "Advanced New Sites", each if [PRODUCT] = "ADV"\nor [PRODUCT] = "Adv"\nthen [NEW_SITE]\nelse 0)\nin\n    #"Added Custom"',
     'let\n    Source = Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]),\n Source2 = PostgreSQL.Database(\"localhost\", \"mics\"),\n  public_order_date = Source2{[Schema=\"public\",Item=\"order_date\"]}[Data],\n    GSL_TEST_DB_Database = Source{[Name=\"GSL_TEST_DB\",Kind=\"Database\"]}[Data],\n  PUBLIC_Schema = GSL_TEST_DB_Database{[Name=\"PUBLIC\",Kind=\"Schema\"]}[Data],\n   SALES_ANALYST_VIEW_View = PUBLIC_Schema{[Name=\"SALES_ANALYST_VIEW\",Kind=\"View\"]}[Data],\n  two_source_table  = Table.Combine({public_order_date, SALES_ANALYST_VIEW_View})\n in\n    two_source_table',
     'let\n Source = PostgreSQL.Database("localhost"  ,   "mics"      ),\n  public_order_date =    Source{[Schema="public",Item="order_date"]}[Data] \n in \n public_order_date',
+    'let\n Source = Oracle.Database("localhost:1521/salesdb.GSLAB.COM", [HierarchicalNavigation=true]), HR = Source{[Schema="HR"]}[Data], EMPLOYEES1 = HR{[Name="EMPLOYEES"]}[Data] \n in EMPLOYEES1'
 ]
 
 
-# def test_parse_m_query1():
-#     expression: str = M_QUERIES[0]
-#     parse_tree: Tree = m_parser._parse_expression(expression)
-#     assert m_parser._get_output_variable(parse_tree) == "TESTTABLE_Table"
-#
-#
-# def test_parse_m_query2():
-#     expression: str = M_QUERIES[1]
-#     parse_tree: Tree = m_parser._parse_expression(expression)
-#     assert m_parser._get_output_variable(parse_tree) == '"Added Custom2"'
-#
-#
-# def test_parse_m_query3():
-#     expression: str = M_QUERIES[2]
-#     parse_tree: Tree = m_parser._parse_expression(expression)
-#     assert m_parser._get_output_variable(parse_tree) == '"Added Conditional Column"'
-#
-#
-# def test_parse_m_query4():
-#     expression: str = M_QUERIES[3]
-#     parse_tree: Tree = m_parser._parse_expression(expression)
-#     assert m_parser._get_output_variable(parse_tree) == '"Changed Type"'
-#
-#
-# def test_parse_m_query5():
-#     expression: str = M_QUERIES[4]
-#     parse_tree: Tree = m_parser._parse_expression(expression)
-#     assert m_parser._get_output_variable(parse_tree) == '"Renamed Columns"'
-#
-#
-# def test_parse_m_query6():
-#     expression: str = M_QUERIES[5]
-#     parse_tree: Tree = m_parser._parse_expression(expression)
-#     assert m_parser._get_output_variable(parse_tree) == '"Added Custom"'
-#
-#
-# def test_parse_m_query7():
-#     expression: str = M_QUERIES[6]
-#     parse_tree: Tree = m_parser._parse_expression(expression)
-#     assert m_parser._get_output_variable(parse_tree) == "Source"
-#
-#
-# def test_parse_m_query8():
-#     expression: str = M_QUERIES[7]
-#     parse_tree: Tree = m_parser._parse_expression(expression)
-#     assert m_parser._get_output_variable(parse_tree) == '"Added Custom1"'
-#
-#
-# def test_parse_m_query9():
-#     expression: str = M_QUERIES[8]
-#     parse_tree: Tree = m_parser._parse_expression(expression)
-#     assert m_parser._get_output_variable(parse_tree) == '"Added Custom1"'
-#
-#
-# def test_parse_m_query10():
-#     expression: str = M_QUERIES[9]
-#     parse_tree: Tree = m_parser._parse_expression(expression)
-#     assert m_parser._get_output_variable(parse_tree) == '"Changed Type1"'
-#
-#
-# def test_parse_m_query11():
-#     expression: str = M_QUERIES[10]
-#     parse_tree: Tree = m_parser._parse_expression(expression)
-#     assert m_parser._get_output_variable(parse_tree) == "Source"
-#
-#
-# def test_parse_m_query12():
-#     expression: str = M_QUERIES[11]
-#     parse_tree: Tree = m_parser._parse_expression(expression)
-#     assert m_parser._get_output_variable(parse_tree) == '"Added Custom"'
-#
-#
-# def test_parse_m_query13():
-#     expression: str = M_QUERIES[12]
-#     parse_tree: Tree = m_parser._parse_expression(expression)
-#     assert m_parser._get_output_variable(parse_tree) == 'two_source_table'
-
-def test_get_upstream():
-    qs = [M_QUERIES[0], M_QUERIES[-1]]
-    for q in qs:
-        table: PowerBiAPI.Table = PowerBiAPI.Table(
-            expression=q,
-            name="table-name",
-            full_name="db-name.schema-name.table-name",
-        )
-        reporter = PowerBiDashboardSourceReport()
-        print(m_parser.get_upstream_tables(table, reporter))
+def test_parse_m_query1():
+    expression: str = M_QUERIES[0]
+    parse_tree: Tree = m_parser._parse_expression(expression)
+    assert m_parser._get_output_variable(parse_tree) == "TESTTABLE_Table"
+
+
+def test_parse_m_query2():
+    expression: str = M_QUERIES[1]
+    parse_tree: Tree = m_parser._parse_expression(expression)
+    assert m_parser._get_output_variable(parse_tree) == '"Added Custom2"'
+
+
+def test_parse_m_query3():
+    expression: str = M_QUERIES[2]
+    parse_tree: Tree = m_parser._parse_expression(expression)
+    assert m_parser._get_output_variable(parse_tree) == '"Added Conditional Column"'
+
+
+def test_parse_m_query4():
+    expression: str = M_QUERIES[3]
+    parse_tree: Tree = m_parser._parse_expression(expression)
+    assert m_parser._get_output_variable(parse_tree) == '"Changed Type"'
+
+
+def test_parse_m_query5():
+    expression: str = M_QUERIES[4]
+    parse_tree: Tree = m_parser._parse_expression(expression)
+    assert m_parser._get_output_variable(parse_tree) == '"Renamed Columns"'
+
+
+def test_parse_m_query6():
+    expression: str = M_QUERIES[5]
+    parse_tree: Tree = m_parser._parse_expression(expression)
+    assert m_parser._get_output_variable(parse_tree) == '"Added Custom"'
+
+
+def test_parse_m_query7():
+    expression: str = M_QUERIES[6]
+    parse_tree: Tree = m_parser._parse_expression(expression)
+    assert m_parser._get_output_variable(parse_tree) == "Source"
+
+
+def test_parse_m_query8():
+    expression: str = M_QUERIES[7]
+    parse_tree: Tree = m_parser._parse_expression(expression)
+    assert m_parser._get_output_variable(parse_tree) == '"Added Custom1"'
+
+
+def test_parse_m_query9():
+    expression: str = M_QUERIES[8]
+    parse_tree: Tree = m_parser._parse_expression(expression)
+    assert m_parser._get_output_variable(parse_tree) == '"Added Custom1"'
+
+
+def test_parse_m_query10():
+    expression: str = M_QUERIES[9]
+    parse_tree: Tree = m_parser._parse_expression(expression)
+    assert m_parser._get_output_variable(parse_tree) == '"Changed Type1"'
+
+
+def test_parse_m_query11():
+    expression: str = M_QUERIES[10]
+    parse_tree: Tree = m_parser._parse_expression(expression)
+    assert m_parser._get_output_variable(parse_tree) == "Source"
+
+
+def test_parse_m_query12():
+    expression: str = M_QUERIES[11]
+    parse_tree: Tree = m_parser._parse_expression(expression)
+    assert m_parser._get_output_variable(parse_tree) == '"Added Custom"'
+
+
+def test_parse_m_query13():
+    expression: str = M_QUERIES[12]
+    parse_tree: Tree = m_parser._parse_expression(expression)
+    assert m_parser._get_output_variable(parse_tree) == 'two_source_table'
+
+
+def test_postgres_regular_case():
+    q: str = M_QUERIES[13]
+    table: PowerBiAPI.Table = PowerBiAPI.Table(
+        expression=q,
+        name="virtual_order_table",
+        full_name="OrderDataSet.virtual_order_table",
+    )
+
+    reporter = PowerBiDashboardSourceReport()
+    data_platform_tables: List[DataPlatformTable] = m_parser.get_upstream_tables(table, reporter)
+
+    assert len(data_platform_tables) == 1
+    assert data_platform_tables[0].name == "order_date"
+    assert data_platform_tables[0].full_name == "mics.public.order_date"
+    assert data_platform_tables[0].platform_type == SupportedDataPlatform.POSTGRES_SQL.value
+
+
+def test_oracle_regular_case():
+    q: str = M_QUERIES[14]
+    table: PowerBiAPI.Table = PowerBiAPI.Table(
+        expression=q,
+        name="virtual_order_table",
+        full_name="OrderDataSet.virtual_order_table",
+    )
+
+    reporter = PowerBiDashboardSourceReport()
+    data_platform_tables: List[DataPlatformTable] = m_parser.get_upstream_tables(table, reporter)
+
+    assert len(data_platform_tables) == 1
+    assert data_platform_tables[0].name == "EMPLOYEES"
+    assert data_platform_tables[0].full_name == "salesdb.HR.EMPLOYEES"
+    assert data_platform_tables[0].platform_type == SupportedDataPlatform.ORACLE.value
+
+
+def test_snowflake_regular_case():
+    q: str = M_QUERIES[0]
+    table: PowerBiAPI.Table = PowerBiAPI.Table(
+        expression=q,
+        name="virtual_order_table",
+        full_name="OrderDataSet.virtual_order_table",
+    )
+
+    reporter = PowerBiDashboardSourceReport()
+    data_platform_tables: List[DataPlatformTable] = m_parser.get_upstream_tables(table, reporter)
 
+    assert len(data_platform_tables) == 1
+    assert data_platform_tables[0].name == "TESTTABLE"
+    assert data_platform_tables[0].full_name == "PBI_TEST.TEST.TESTTABLE"
+    assert data_platform_tables[0].platform_type == SupportedDataPlatform.SNOWFLAKE.value

From d7c046448b9a061f6895468408c1b3da3cb2bffb Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Mon, 12 Dec 2022 13:55:36 +0530
Subject: [PATCH 18/53] lint fix

---
 .../ingestion/source/powerbi/config.py        |  22 +-
 .../ingestion/source/powerbi/m_parser.py      | 246 +++++++++++++-----
 .../ingestion/source/powerbi/powerbi.py       |  69 ++---
 .../integration/powerbi/test_m_parser.py      |  34 ++-
 .../tests/integration/powerbi/test_powerbi.py |   1 +
 5 files changed, 253 insertions(+), 119 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
index b4a1422a11ada..db0f5858d997e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
@@ -1,18 +1,15 @@
-import pydantic
+from dataclasses import dataclass, field as dataclass_field
+from typing import Dict, List, Union
 
+import pydantic
 from pydantic import validator
 
 import datahub.emitter.mce_builder as builder
-
-from dataclasses import field as dataclass_field
-from typing import List
-
-from dataclasses import  dataclass
-from datahub.configuration.source_common import EnvBasedSourceConfigBase, DEFAULT_ENV
-from typing import Dict, Union
+from datahub.configuration.source_common import DEFAULT_ENV, EnvBasedSourceConfigBase
 from datahub.ingestion.api.source import SourceReport
 from datahub.ingestion.source.powerbi import m_parser
 
+
 class Constant:
     """
     keys used in powerbi plugin
@@ -87,7 +84,10 @@ def report_charts_dropped(self, view: str) -> None:
 
 @dataclass
 class PlatformDetail:
-    platform_instance: str = pydantic.Field(default=None, description="DataHub platform instance name. It should be same as you have used in ingestion receipe of DataHub platform ingestion source")
+    platform_instance: str = pydantic.Field(
+        default=None,
+        description="DataHub platform instance name. It should be same as you have used in ingestion receipe of DataHub platform ingestion source",
+    )
     env: str = pydantic.Field(
         default=DEFAULT_ENV,
         description="The environment that all assets produced by DataHub platform ingestion source belong to",
@@ -102,7 +102,9 @@ class PowerBiAPIConfig(EnvBasedSourceConfigBase):
     # Dataset type mapping PowerBI support many type of data-sources. Here user need to define what type of PowerBI
     # DataSource need to be mapped to corresponding DataHub Platform DataSource. For example PowerBI `Snowflake` is
     # mapped to DataHub `snowflake` PowerBI `PostgreSQL` is mapped to DataHub `postgres` and so on.
-    dataset_type_mapping: Union[Dict[str, str], Dict[str, PlatformDetail]] = pydantic.Field(
+    dataset_type_mapping: Union[
+        Dict[str, str], Dict[str, PlatformDetail]
+    ] = pydantic.Field(
         description="Mapping of PowerBI datasource type to DataHub supported data-sources. See Quickstart Recipe for mapping"
     )
     # Azure app client identifier
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
index 7a69aad810a01..0b40639f8aadf 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
@@ -1,15 +1,16 @@
+import importlib.resources as pkg_resource
+import logging
 from abc import ABC, abstractmethod
+from dataclasses import dataclass
 from enum import Enum
 from functools import partial
+from typing import Any, Dict, List, Optional, Tuple, Type, Union, cast
+
+import lark
+from lark import Lark, Token, Tree
 
-from dataclasses import  dataclass
-import importlib.resources as pkg_resource
 from datahub.ingestion.source.powerbi.config import PowerBiDashboardSourceReport
 from datahub.ingestion.source.powerbi.proxy import PowerBiAPI
-import logging
-from typing import List, Optional, Any, Dict, Union, cast
-
-from lark import Lark, Tree, Token
 
 LOGGER = logging.getLogger(__name__)
 
@@ -35,11 +36,17 @@ class SupportedDataPlatform(Enum):
 
 
 def _get_output_variable(root: Tree) -> Optional[str]:
-    in_expression_tree: Tree = _get_first_rule(root, "in_expression")
+    in_expression_tree: Optional[Tree] = _get_first_rule(root, "in_expression")
+    if in_expression_tree is None:
+        return None
     # Get list of terminal value
     # Remove any whitespaces
     # Remove any spaces
-    return "".join(_strip_char_from_list(_remove_whitespaces_from_list(_token_values(in_expression_tree)), " "))
+    return "".join(
+        _strip_char_from_list(
+            _remove_whitespaces_from_list(_token_values(in_expression_tree)), " "
+        )
+    )
 
 
 def _get_variable_statement(parse_tree: Tree, variable: str) -> Optional[Tree]:
@@ -49,8 +56,8 @@ def _get_variable_statement(parse_tree: Tree, variable: str) -> Optional[Tree]:
     for tree in _filter:
         values: List[str] = _token_values(tree.children[0])
         actual_value: str = "".join(_strip_char_from_list(values, " "))
-        LOGGER.info("Actual Value = %s", actual_value)
-        LOGGER.info("Expected Value = %s", variable)
+        LOGGER.debug("Actual Value = %s", actual_value)
+        LOGGER.debug("Expected Value = %s", variable)
 
         if actual_value == variable:
             return tree
@@ -67,6 +74,7 @@ def _get_first_rule(tree: Tree, rule: str) -> Optional[Tree]:
     :param tree: Tree to search for the expression rule
     :return: Tree
     """
+
     def internal(node: Union[Tree, Token]) -> Optional[Tree]:
         if isinstance(node, Tree) and node.data == rule:
             return node
@@ -74,9 +82,11 @@ def internal(node: Union[Tree, Token]) -> Optional[Tree]:
             return None
 
         for child in cast(Tree, node).children:
-            node = internal(child)
-            if node is not None:
-                return node
+            child_node: Optional[Tree] = internal(child)
+            if child_node is not None:
+                return child_node
+
+        return None
 
     expression_tree: Optional[Tree] = internal(tree)
 
@@ -91,7 +101,7 @@ def _token_values(tree: Tree) -> List[str]:
     """
     values: List[str] = []
 
-    def internal(node: Union[Tree, Token]):
+    def internal(node: Union[Tree, Token]) -> None:
         if isinstance(node, Token):
             values.append(cast(Token, node).value)
             return
@@ -107,7 +117,7 @@ def internal(node: Union[Tree, Token]):
 def _remove_whitespaces_from_list(values: List[str]) -> List[str]:
     result: List[str] = []
     for item in values:
-        if item.strip() not in ('', '\n', '\t'):
+        if item.strip() not in ("", "\n", "\t"):
             result.append(item)
 
     return result
@@ -135,7 +145,12 @@ class AbstractDataAccessMQueryResolver(AbstractMQueryResolver, ABC):
     parse_tree: Tree
     reporter: PowerBiDashboardSourceReport
 
-    def __init__(self, table: PowerBiAPI.Table, parse_tree: Tree, reporter: PowerBiDashboardSourceReport):
+    def __init__(
+        self,
+        table: PowerBiAPI.Table,
+        parse_tree: Tree,
+        reporter: PowerBiDashboardSourceReport,
+    ):
         self.table = table
         self.parse_tree = parse_tree
         self.reporter = reporter
@@ -144,54 +159,87 @@ def __init__(self, table: PowerBiAPI.Table, parse_tree: Tree, reporter: PowerBiD
         self.first_arg_list_func = partial(_get_first_rule, rule="argument_list")
         self.first_identifier_func = partial(_get_first_rule, rule="identifier")
 
-
-
     @abstractmethod
     def resolve_to_data_platform_table_list(self) -> List[DataPlatformTable]:
         pass
 
 
 class BaseMQueryResolver(AbstractDataAccessMQueryResolver, ABC):
+    def get_item_selector_tokens(
+        self, variable_statement: Tree
+    ) -> Tuple[Optional[str], Optional[Dict[str, str]]]:
+        expression_tree: Optional[Tree] = self.first_expression_func(variable_statement)
+        if expression_tree is None:
+            LOGGER.debug("Expression tree not found")
+            LOGGER.debug(variable_statement.pretty())
+            return None, None
+
+        item_selector: Optional[Tree] = self.first_item_selector_func(expression_tree)
+        if item_selector is None:
+            LOGGER.debug("Item Selector not found in tree")
+            LOGGER.debug(variable_statement.pretty())
+            return None, None
+
+        identifier_tree: Optional[Tree] = self.first_identifier_func(expression_tree)
+        if identifier_tree is None:
+            LOGGER.debug("Identifier not found in tree")
+            LOGGER.debug(variable_statement.pretty())
+            return None, None
 
-    def get_item_selector_tokens(self, variable_statement: Tree) -> (str, List[str]):
-        expression_tree: Tree = self.first_expression_func(variable_statement)
-        item_selector: Tree = self.first_item_selector_func(expression_tree)
-        identifier_tree: Tree = self.first_identifier_func(expression_tree)
         # remove whitespaces and quotes from token
-        tokens: List[str] = _strip_char_from_list(_remove_whitespaces_from_list(_token_values(item_selector)), "\"")
-        identifier: List[str] = _token_values(identifier_tree)
+        tokens: List[str] = _strip_char_from_list(
+            _remove_whitespaces_from_list(_token_values(cast(Tree, item_selector))),
+            '"',
+        )
+        identifier: List[str] = _token_values(
+            cast(Tree, identifier_tree)
+        )  # type :ignore
         # convert tokens to dict
         iterator = iter(tokens)
+        # cast to satisfy lint
         return identifier[0], dict(zip(iterator, iterator))
 
-    def get_argument_list(self, variable_statement: Tree) -> List[str]:
-        expression_tree: Tree = self.first_expression_func(variable_statement)
-        argument_list: Tree = self.first_arg_list_func(expression_tree)
+    def get_argument_list(self, variable_statement: Tree) -> Optional[List[str]]:
+        expression_tree: Optional[Tree] = self.first_expression_func(variable_statement)
+        if expression_tree is None:
+            LOGGER.debug("First expression rule not found in input tree")
+            return None
+
+        argument_list: Optional[Tree] = self.first_arg_list_func(expression_tree)
+        if argument_list is None:
+            LOGGER.debug("First argument-list rule not found in input tree")
+            return None
+
         # remove whitespaces and quotes from token
-        tokens: List[str] = _strip_char_from_list(_remove_whitespaces_from_list(_token_values(argument_list)), "\"")
+        tokens: List[str] = _strip_char_from_list(
+            _remove_whitespaces_from_list(_token_values(argument_list)), '"'
+        )
         return tokens
 
     def resolve_to_data_platform_table_list(self) -> List[DataPlatformTable]:
         data_platform_tables: List[DataPlatformTable] = []
         # Look for output variable
-        output_variable: str = _get_output_variable(self.parse_tree)
+        output_variable: Optional[str] = _get_output_variable(self.parse_tree)
         if output_variable is None:
-            self.reporter.warnings(
+            self.reporter.report_warning(
                 f"{self.table.full_name}-output-variable",
                 "output-variable not found in table expression",
             )
             return data_platform_tables
 
-        full_table_name: str = self.get_full_table_name(output_variable)
+        full_table_name: Optional[str] = self.get_full_table_name(output_variable)
         if full_table_name is None:
-            LOGGER.debug("Fail to form full_table_name for PowerBI DataSet table %s", self.table.full_name)
+            LOGGER.debug(
+                "Fail to form full_table_name for PowerBI DataSet table %s",
+                self.table.full_name,
+            )
             return data_platform_tables
 
         return [
             DataPlatformTable(
                 name=full_table_name.split(".")[-1],
                 full_name=full_table_name,
-                platform_type=self.get_platform()
+                platform_type=self.get_platform(),
             ),
         ]
 
@@ -200,20 +248,29 @@ def get_platform(self) -> str:
         pass
 
     @abstractmethod
-    def get_full_table_name(self, output_variable: str) -> str:
+    def get_full_table_name(self, output_variable: str) -> Optional[str]:
         pass
 
 
 class PostgresMQueryResolver(BaseMQueryResolver):
     def get_full_table_name(self, output_variable: str) -> Optional[str]:
-        variable_statement: Tree = _get_variable_statement(self.parse_tree, output_variable)
+        variable_statement: Optional[Tree] = _get_variable_statement(
+            self.parse_tree, output_variable
+        )
         if variable_statement is None:
             self.reporter.report_warning(
                 f"{self.table.full_name}-variable-statement",
                 f"output variable ({output_variable}) statement not found in table expression",
             )
             return None
-        source, tokens = self.get_item_selector_tokens(variable_statement)
+        source, tokens = self.get_item_selector_tokens(cast(Tree, variable_statement))
+        if source is None or tokens is None:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-variable-statement",
+                "Schema detail not found in table expression",
+            )
+            return None
+
         schema_name: str = tokens["Schema"]
         table_name: str = tokens["Item"]
         # Look for database-name
@@ -224,16 +281,16 @@ def get_full_table_name(self, output_variable: str) -> Optional[str]:
                 f"source variable {source} statement not found in table expression",
             )
             return None
-        tokens = self.get_argument_list(variable_statement)
-        if len(tokens) < 1:
+        arg_list = self.get_argument_list(cast(Tree, variable_statement))
+        if arg_list is None or len(arg_list) < 1:
             self.reporter.report_warning(
                 f"{self.table.full_name}-database-arg-list",
                 "Expected number of argument not found in data-access function of table expression",
             )
             return None
 
-        database_name: str = tokens[1]  # 1st token is database name
-        return f"{database_name}.{schema_name}.{table_name}"
+        database_name: str = cast(List[str], arg_list)[1]  # 1st token is database name
+        return cast(Optional[str], f"{database_name}.{schema_name}.{table_name}")
 
     def get_platform(self) -> str:
         return SupportedDataPlatform.POSTGRES_SQL.value
@@ -248,8 +305,7 @@ def _get_db_name(self, value: str) -> Optional[str]:
         splitter_result: List[str] = value.split("/")
         if len(splitter_result) != 2:
             self.reporter.report_warning(
-                f"{self.table.full_name}-oracle-target",
-                error_message
+                f"{self.table.full_name}-oracle-target", error_message
             )
             return None
 
@@ -257,9 +313,11 @@ def _get_db_name(self, value: str) -> Optional[str]:
 
         return db_name
 
-    def get_full_table_name(self, output_variable: str) -> str:
+    def get_full_table_name(self, output_variable: str) -> Optional[str]:
         # Find step for the output variable
-        variable_statement: Tree = _get_variable_statement(self.parse_tree, output_variable)
+        variable_statement: Optional[Tree] = _get_variable_statement(
+            self.parse_tree, output_variable
+        )
 
         if variable_statement is None:
             self.reporter.report_warning(
@@ -268,11 +326,22 @@ def get_full_table_name(self, output_variable: str) -> str:
             )
             return None
 
-        schema_variable, tokens = self.get_item_selector_tokens(variable_statement)
+        schema_variable, tokens = self.get_item_selector_tokens(
+            cast(Tree, variable_statement)
+        )
+        if schema_variable is None or tokens is None:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-variable-statement",
+                "table name not found in table expression",
+            )
+            return None
+
         table_name: str = tokens["Name"]
 
         # Find step for the schema variable
-        variable_statement = _get_variable_statement(self.parse_tree, schema_variable)
+        variable_statement = _get_variable_statement(
+            self.parse_tree, cast(str, schema_variable)
+        )
         if variable_statement is None:
             self.reporter.report_warning(
                 f"{self.table.full_name}-schema-variable-statement",
@@ -281,6 +350,13 @@ def get_full_table_name(self, output_variable: str) -> str:
             return None
 
         source_variable, tokens = self.get_item_selector_tokens(variable_statement)
+        if source_variable is None or tokens is None:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-variable-statement",
+                "Schema not found in table expression",
+            )
+            return None
+
         schema_name: str = tokens["Schema"]
 
         # Find step for the database access variable
@@ -291,17 +367,17 @@ def get_full_table_name(self, output_variable: str) -> str:
                 f"schema variable ({source_variable}) statement not found in table expression",
             )
             return None
-        tokens = self.get_argument_list(variable_statement)
-        if len(tokens) < 1:
+        arg_list = self.get_argument_list(variable_statement)
+        if arg_list is None or len(arg_list) < 1:
             self.reporter.report_warning(
                 f"{self.table.full_name}-database-arg-list",
                 "Expected number of argument not found in data-access function of table expression",
             )
             return None
         # The first argument has database name. format localhost:1521/salesdb.GSLAB.COM
-        db_name: Optional[str] = self._get_db_name(tokens[0])
+        db_name: Optional[str] = self._get_db_name(arg_list[0])
         if db_name is None:
-            LOGGER.debug(f"Fail to extract db name from the target {tokens[0]}")
+            LOGGER.debug(f"Fail to extract db name from the target {arg_list}")
 
         return f"{db_name}.{schema_name}.{table_name}"
 
@@ -310,9 +386,11 @@ class SnowflakeMQueryResolver(BaseMQueryResolver):
     def get_platform(self) -> str:
         return SupportedDataPlatform.SNOWFLAKE.value
 
-    def get_full_table_name(self, output_variable: str) -> str:
+    def get_full_table_name(self, output_variable: str) -> Optional[str]:
         # Find step for the output variable
-        variable_statement: Tree = _get_variable_statement(self.parse_tree, output_variable)
+        variable_statement: Optional[Tree] = _get_variable_statement(
+            self.parse_tree, output_variable
+        )
 
         if variable_statement is None:
             self.reporter.report_warning(
@@ -322,6 +400,13 @@ def get_full_table_name(self, output_variable: str) -> str:
             return None
 
         schema_variable, tokens = self.get_item_selector_tokens(variable_statement)
+        if schema_variable is None or tokens is None:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-variable-statement",
+                "table name not found in table expression",
+            )
+            return None
+
         table_name: str = tokens["Name"]
 
         # Find step for the schema variable
@@ -334,6 +419,13 @@ def get_full_table_name(self, output_variable: str) -> str:
             return None
 
         source_variable, tokens = self.get_item_selector_tokens(variable_statement)
+        if source_variable is None or tokens is None:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-variable-statement",
+                "schema name not found in table expression",
+            )
+            return None
+
         schema_name: str = tokens["Name"]
 
         # Find step for the database access variable
@@ -345,12 +437,19 @@ def get_full_table_name(self, output_variable: str) -> str:
             )
             return None
         _, tokens = self.get_item_selector_tokens(variable_statement)
+        if tokens is None:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-variable-statement",
+                "database name not found in table expression",
+            )
+            return None
+
         db_name: str = tokens["Name"]
 
         return f"{db_name}.{schema_name}.{table_name}"
 
 
-def _get_resolver(parse_tree: Tree) -> Optional[AbstractMQueryResolver]:
+def _get_resolver(parse_tree: Tree) -> Optional[Type["BaseMQueryResolver"]]:
 
     _filter: Any = parse_tree.find_data("invoke_expression")
 
@@ -371,16 +470,11 @@ def _get_resolver(parse_tree: Tree) -> Optional[AbstractMQueryResolver]:
 
 
 # Register M-Query resolver for specific database platform
-DATA_ACCESS_RESOLVER: Dict[str, AbstractDataAccessMQueryResolver.__class__] = {
+DATA_ACCESS_RESOLVER = {
     f"{SupportedDataPlatform.POSTGRES_SQL.value}.Database": PostgresMQueryResolver,
     f"{SupportedDataPlatform.ORACLE.value}.Database": OracleMQueryResolver,
     f"{SupportedDataPlatform.SNOWFLAKE.value}.Databases": SnowflakeMQueryResolver,
-}
-
-# Register M-Query resolver for function call to resolve function arguments
-TABLE_ACCESS_RESOLVER: Dict[str, AbstractMQueryResolver.__class__] = {
-    "Table.Combine": None,
-}
+}  # type :ignore
 
 
 def _parse_expression(expression: str) -> Tree:
@@ -395,28 +489,46 @@ def _parse_expression(expression: str) -> Tree:
     parse_tree: Tree = lark_parser.parse(expression)
 
     LOGGER.debug("Parse Tree")
-    if LOGGER.level == logging.DEBUG:  # Guard condition to avoid heavy pretty() function call
+    if (
+        LOGGER.level == logging.DEBUG
+    ):  # Guard condition to avoid heavy pretty() function call
         LOGGER.debug(parse_tree.pretty())
 
     return parse_tree
 
 
-def get_upstream_tables(table: PowerBiAPI.Table, reporter: PowerBiDashboardSourceReport) -> List[DataPlatformTable]:
-    parse_tree = _parse_expression(table.expression)
+def get_upstream_tables(
+    table: PowerBiAPI.Table, reporter: PowerBiDashboardSourceReport
+) -> List[DataPlatformTable]:
+    if table.expression is None:
+        reporter.report_warning(table.full_name, "Expression is none")
+        return []
+
+    try:
+        parse_tree: Tree = _parse_expression(table.expression)
+    except lark.exceptions.UnexpectedCharacters:
+        reporter.report_warning(
+            table.full_name, f"UnSupported expression = {table.expression}"
+        )
+        return []
 
     trees: List[Tree] = list(parse_tree.find_data("invoke_expression"))
     if len(trees) > 1:
-        reporter.report_warning(table.full_name, f"{table.full_name} has more than one invoke expression")
+        reporter.report_warning(
+            table.full_name, f"{table.full_name} has more than one invoke expression"
+        )
         return []
 
-    resolver: AbstractDataAccessMQueryResolver = _get_resolver(parse_tree)
+    resolver: Optional[Type[BaseMQueryResolver]] = _get_resolver(parse_tree)
     if resolver is None:
         LOGGER.debug("Table full-name = %s", table.full_name)
         LOGGER.debug("Expression = %s", table.expression)
         reporter.report_warning(
             table.full_name,
-            f"{table.full_name} M-Query resolver not found for the table expression"
+            f"{table.full_name} M-Query resolver not found for the table expression",
         )
         return []
 
-    return resolver(table, parse_tree, reporter).resolve_to_data_platform_table_list()
+    return resolver(
+        table, parse_tree, reporter
+    ).resolve_to_data_platform_table_list()  # type: ignore
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
index 2878ffdc5680a..f172cc37cc721 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
@@ -5,7 +5,7 @@
 #########################################################
 
 import logging
-from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast
+from typing import Iterable, List, Optional, Tuple, Union, cast
 
 import datahub.emitter.mce_builder as builder
 from datahub.configuration.source_common import DEFAULT_ENV
@@ -21,7 +21,15 @@
 )
 from datahub.ingestion.api.source import Source, SourceReport
 from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.ingestion.source.powerbi import m_parser
+from datahub.ingestion.source.powerbi.config import (
+    Constant,
+    PlatformDetail,
+    PowerBiDashboardSourceConfig,
+    PowerBiDashboardSourceReport,
+)
 from datahub.ingestion.source.powerbi.m_parser import DataPlatformTable
+from datahub.ingestion.source.powerbi.proxy import PowerBiAPI
 from datahub.metadata.com.linkedin.pegasus2avro.common import ChangeAuditStamps
 from datahub.metadata.schema_classes import (
     BrowsePathsClass,
@@ -32,22 +40,18 @@
     CorpUserKeyClass,
     DashboardInfoClass,
     DashboardKeyClass,
+    DatasetLineageTypeClass,
     DatasetPropertiesClass,
     OwnerClass,
     OwnershipClass,
     OwnershipTypeClass,
     StatusClass,
-    SubTypesClass, UpstreamClass, DatasetLineageTypeClass, UpstreamLineageClass,
+    SubTypesClass,
+    UpstreamClass,
+    UpstreamLineageClass,
 )
 from datahub.utilities.dedup_list import deduplicate_list
-from datahub.ingestion.source.powerbi import m_parser
-from datahub.ingestion.source.powerbi.config import (
-    PowerBiDashboardSourceReport,
-    PowerBiDashboardSourceConfig,
-    PlatformDetail,
-    Constant
-)
-from datahub.ingestion.source.powerbi.proxy import PowerBiAPI
+
 # Logger instance
 LOGGER = logging.getLogger(__name__)
 
@@ -69,7 +73,11 @@ def __eq__(self, instance):
         def __hash__(self):
             return id(self.id)
 
-    def __init__(self, config: PowerBiDashboardSourceConfig, reporter: PowerBiDashboardSourceReport):
+    def __init__(
+        self,
+        config: PowerBiDashboardSourceConfig,
+        reporter: PowerBiDashboardSourceReport,
+    ):
         self.__config = config
         self.__reporter = reporter
 
@@ -150,18 +158,28 @@ def __to_datahub_dataset(
             )
             dataset_mcps.extend([info_mcp, status_mcp])
 
-            # Check if upstreams table is available, parse them and create dataset URN for each upstream table
             if self.__config.extract_lineage is True:
+                # Check if upstreams table is available, parse them and create dataset URN for each upstream table
                 upstreams: List[UpstreamClass] = []
-                upstream_tables: List[DataPlatformTable] = m_parser.get_upstream_tables(table.expression, self.__reporter)
+                upstream_tables: List[DataPlatformTable] = m_parser.get_upstream_tables(
+                    table, self.__reporter
+                )
                 for upstream_table in upstream_tables:
-                    platform: Union[str, PlatformDetail] = self.__config.dataset_type_mapping[upstream_table.platform_type]
-                    platform_name: str = m_parser.POWERBI_TO_DATAHUB_DATA_PLATFORM_MAPPING[upstream_table.platform_type]
-                    platform_instance_name: str = None
+                    platform: Union[
+                        str, PlatformDetail
+                    ] = self.__config.dataset_type_mapping[upstream_table.platform_type]
+                    platform_name: str = (
+                        m_parser.POWERBI_TO_DATAHUB_DATA_PLATFORM_MAPPING[
+                            upstream_table.platform_type
+                        ]
+                    )
+                    platform_instance_name: Optional[str] = None
                     platform_env: str = DEFAULT_ENV
                     # Determine if PlatformDetail is provided
                     if isinstance(platform, PlatformDetail):
-                        platform_instance_name = cast(PlatformDetail, platform).platform_instance
+                        platform_instance_name = cast(
+                            PlatformDetail, platform
+                        ).platform_instance
                         platform_env = cast(PlatformDetail, platform).env
 
                     upstream_urn = builder.make_dataset_urn_with_platform_instance(
@@ -170,11 +188,11 @@ def __to_datahub_dataset(
                         env=platform_env,
                         name=upstream_table.full_name,
                     )
-                    upstream_table = UpstreamClass(
+                    upstream_table_class = UpstreamClass(
                         upstream_urn,
                         DatasetLineageTypeClass.TRANSFORMED,
                     )
-                    upstreams.append(upstream_table)
+                    upstreams.append(upstream_table_class)
 
                     if len(upstreams) > 0:
                         upstream_lineage = UpstreamLineageClass(upstreams=upstreams)
@@ -538,19 +556,6 @@ def to_chart_mcps(
                 aspect=StatusClass(removed=False),
             )
 
-            # ChartKey status
-            chart_key_instance = ChartKeyClass(
-                dashboardTool=self.__config.platform_name,
-                chartId=Constant.CHART_ID.format(page.id),
-            )
-
-            chartkey_mcp = self.new_mcp(
-                entity_type=Constant.CHART,
-                entity_urn=chart_urn,
-                aspect_name=Constant.CHART_KEY,
-                aspect=chart_key_instance,
-            )
-
             return [info_mcp, status_mcp]
 
         for page in pages:
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index 9128b5912b86b..3b5f9dbd515c3 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -3,9 +3,12 @@
 from lark import Tree
 
 from datahub.ingestion.source.powerbi import m_parser
-from datahub.ingestion.source.powerbi.m_parser import DataPlatformTable, SupportedDataPlatform
-from datahub.ingestion.source.powerbi.proxy import PowerBiAPI
 from datahub.ingestion.source.powerbi.config import PowerBiDashboardSourceReport
+from datahub.ingestion.source.powerbi.m_parser import (
+    DataPlatformTable,
+    SupportedDataPlatform,
+)
+from datahub.ingestion.source.powerbi.proxy import PowerBiAPI
 
 M_QUERIES = [
     'let\n    Source = Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","PBI_TEST_WAREHOUSE_PROD",[Role="PBI_TEST_MEMBER"]),\n    PBI_TEST_Database = Source{[Name="PBI_TEST",Kind="Database"]}[Data],\n    TEST_Schema = PBI_TEST_Database{[Name="TEST",Kind="Schema"]}[Data],\n    TESTTABLE_Table = TEST_Schema{[Name="TESTTABLE",Kind="Table"]}[Data]\nin\n    TESTTABLE_Table',
@@ -20,9 +23,9 @@
     'let\n    Source = Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","OPERATIONS_ANALYTICS_WAREHOUSE_PROD",[Role="OPERATIONS_ANALYTICS_MEMBER_AD"]),\n    OPERATIONS_ANALYTICS_Database = Source{[Name="OPERATIONS_ANALYTICS",Kind="Database"]}[Data],\n    TEST_Schema = OPERATIONS_ANALYTICS_Database{[Name="TEST",Kind="Schema"]}[Data],\n    LZ_MIGRATION_DOWNLOAD_View = TEST_Schema{[Name="LZ_MIGRATION_DOWNLOAD",Kind="View"]}[Data],\n    #"Changed Type" = Table.TransformColumnTypes(LZ_MIGRATION_DOWNLOAD_View,{{"MIGRATION_MONTH_ID", type text}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "Migration Month", each Date.FromText(\nText.Range([MIGRATION_MONTH_ID], 0,4) & "-" & \nText.Range([MIGRATION_MONTH_ID], 4,2) \n)),\n    #"Changed Type1" = Table.TransformColumnTypes(#"Added Custom",{{"Migration Month", type date}})\nin\n    #"Changed Type1"',
     "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select *,#(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS Agent,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Industries'#(lf)and TARGET_TEAM = 'Enterprise'\", null, [EnableFolding=true])\nin\n    Source",
     'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="Select#(lf)*,#(lf)concat((UPPER(REPLACE(SALES_SPECIALIST,\'-\',\'\'))),#(lf)LEFT(CAST(INVOICE_DATE AS DATE),4)+LEFT(RIGHT(CAST(INVOICE_DATE AS DATE),5),2)) AS AGENT_KEY,#(lf)CASE#(lf)    WHEN CLASS = \'Software\' and (NOT(PRODUCT in (\'ADV\', \'Adv\') and left(ACCOUNT_ID,2)=\'10\') #(lf)    or V_ENTERPRISE_INVOICED_REVENUE.TYPE = \'Manual Adjustment\') THEN INVOICE_AMOUNT#(lf)    WHEN V_ENTERPRISE_INVOICED_REVENUE.TYPE IN (\'Recurring\',\'0\') THEN INVOICE_AMOUNT#(lf)    ELSE 0#(lf)END as SOFTWARE_INV#(lf)#(lf)from V_ENTERPRISE_INVOICED_REVENUE", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #"Added Conditional Column" = Table.AddColumn(Source, "Services", each if [CLASS] = "Services" then [INVOICE_AMOUNT] else 0),\n    #"Added Custom" = Table.AddColumn(#"Added Conditional Column", "Advanced New Sites", each if [PRODUCT] = "ADV"\nor [PRODUCT] = "Adv"\nthen [NEW_SITE]\nelse 0)\nin\n    #"Added Custom"',
-    'let\n    Source = Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]),\n Source2 = PostgreSQL.Database(\"localhost\", \"mics\"),\n  public_order_date = Source2{[Schema=\"public\",Item=\"order_date\"]}[Data],\n    GSL_TEST_DB_Database = Source{[Name=\"GSL_TEST_DB\",Kind=\"Database\"]}[Data],\n  PUBLIC_Schema = GSL_TEST_DB_Database{[Name=\"PUBLIC\",Kind=\"Schema\"]}[Data],\n   SALES_ANALYST_VIEW_View = PUBLIC_Schema{[Name=\"SALES_ANALYST_VIEW\",Kind=\"View\"]}[Data],\n  two_source_table  = Table.Combine({public_order_date, SALES_ANALYST_VIEW_View})\n in\n    two_source_table',
+    'let\n    Source = Snowflake.Databases("xaa48144.snowflakecomputing.com","GSL_TEST_WH",[Role="ACCOUNTADMIN"]),\n Source2 = PostgreSQL.Database("localhost", "mics"),\n  public_order_date = Source2{[Schema="public",Item="order_date"]}[Data],\n    GSL_TEST_DB_Database = Source{[Name="GSL_TEST_DB",Kind="Database"]}[Data],\n  PUBLIC_Schema = GSL_TEST_DB_Database{[Name="PUBLIC",Kind="Schema"]}[Data],\n   SALES_ANALYST_VIEW_View = PUBLIC_Schema{[Name="SALES_ANALYST_VIEW",Kind="View"]}[Data],\n  two_source_table  = Table.Combine({public_order_date, SALES_ANALYST_VIEW_View})\n in\n    two_source_table',
     'let\n Source = PostgreSQL.Database("localhost"  ,   "mics"      ),\n  public_order_date =    Source{[Schema="public",Item="order_date"]}[Data] \n in \n public_order_date',
-    'let\n Source = Oracle.Database("localhost:1521/salesdb.GSLAB.COM", [HierarchicalNavigation=true]), HR = Source{[Schema="HR"]}[Data], EMPLOYEES1 = HR{[Name="EMPLOYEES"]}[Data] \n in EMPLOYEES1'
+    'let\n Source = Oracle.Database("localhost:1521/salesdb.GSLAB.COM", [HierarchicalNavigation=true]), HR = Source{[Schema="HR"]}[Data], EMPLOYEES1 = HR{[Name="EMPLOYEES"]}[Data] \n in EMPLOYEES1',
 ]
 
 
@@ -101,7 +104,7 @@ def test_parse_m_query12():
 def test_parse_m_query13():
     expression: str = M_QUERIES[12]
     parse_tree: Tree = m_parser._parse_expression(expression)
-    assert m_parser._get_output_variable(parse_tree) == 'two_source_table'
+    assert m_parser._get_output_variable(parse_tree) == "two_source_table"
 
 
 def test_postgres_regular_case():
@@ -113,12 +116,17 @@ def test_postgres_regular_case():
     )
 
     reporter = PowerBiDashboardSourceReport()
-    data_platform_tables: List[DataPlatformTable] = m_parser.get_upstream_tables(table, reporter)
+    data_platform_tables: List[DataPlatformTable] = m_parser.get_upstream_tables(
+        table, reporter
+    )
 
     assert len(data_platform_tables) == 1
     assert data_platform_tables[0].name == "order_date"
     assert data_platform_tables[0].full_name == "mics.public.order_date"
-    assert data_platform_tables[0].platform_type == SupportedDataPlatform.POSTGRES_SQL.value
+    assert (
+        data_platform_tables[0].platform_type
+        == SupportedDataPlatform.POSTGRES_SQL.value
+    )
 
 
 def test_oracle_regular_case():
@@ -130,7 +138,9 @@ def test_oracle_regular_case():
     )
 
     reporter = PowerBiDashboardSourceReport()
-    data_platform_tables: List[DataPlatformTable] = m_parser.get_upstream_tables(table, reporter)
+    data_platform_tables: List[DataPlatformTable] = m_parser.get_upstream_tables(
+        table, reporter
+    )
 
     assert len(data_platform_tables) == 1
     assert data_platform_tables[0].name == "EMPLOYEES"
@@ -147,9 +157,13 @@ def test_snowflake_regular_case():
     )
 
     reporter = PowerBiDashboardSourceReport()
-    data_platform_tables: List[DataPlatformTable] = m_parser.get_upstream_tables(table, reporter)
+    data_platform_tables: List[DataPlatformTable] = m_parser.get_upstream_tables(
+        table, reporter
+    )
 
     assert len(data_platform_tables) == 1
     assert data_platform_tables[0].name == "TESTTABLE"
     assert data_platform_tables[0].full_name == "PBI_TEST.TEST.TESTTABLE"
-    assert data_platform_tables[0].platform_type == SupportedDataPlatform.SNOWFLAKE.value
+    assert (
+        data_platform_tables[0].platform_type == SupportedDataPlatform.SNOWFLAKE.value
+    )
diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
index 40d441b9cbc91..acaa2fb77307c 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
@@ -220,6 +220,7 @@ def default_source_config():
         "client_secret": "bar",
         "tenant_id": "0B0C960B-FCDF-4D0F-8C45-2E03BB59DDEB",
         "workspace_id": "64ED5CAD-7C10-4684-8180-826122881108",
+        "extract_lineage": False,
         "dataset_type_mapping": {
             "PostgreSql": "postgres",
             "Oracle": "oracle",

From 75d5b6bf477b2cc5d371add21d355281b9be2623 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Mon, 12 Dec 2022 14:02:40 +0530
Subject: [PATCH 19/53] PowerBI API

---
 .../datahub/ingestion/source/powerbi/proxy.py | 885 ++++++++++++++++++
 1 file changed, 885 insertions(+)
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/powerbi/proxy.py

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/proxy.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/proxy.py
new file mode 100644
index 0000000000000..1b644a4fb4265
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/proxy.py
@@ -0,0 +1,885 @@
+import json
+import logging
+from dataclasses import dataclass
+from enum import Enum
+from time import sleep
+from typing import Any, Dict, List, Optional
+
+import msal
+import requests as requests
+
+from datahub.configuration.common import ConfigurationError
+from datahub.ingestion.source.powerbi.config import (
+    Constant,
+    PowerBiAPIConfig,
+    PowerBiDashboardSourceReport,
+)
+
+# Logger instance
+LOGGER = logging.getLogger(__name__)
+
+
+class PowerBiAPI:
+    # API endpoints of PowerBi to fetch dashboards, tiles, datasets
+    API_ENDPOINTS = {
+        Constant.DASHBOARD_LIST: "{POWERBI_BASE_URL}/{WORKSPACE_ID}/dashboards",
+        Constant.ENTITY_USER_LIST: "{POWERBI_ADMIN_BASE_URL}/{ENTITY}/{ENTITY_ID}/users",
+        Constant.TILE_LIST: "{POWERBI_BASE_URL}/{WORKSPACE_ID}/dashboards/{DASHBOARD_ID}/tiles",
+        Constant.DATASET_GET: "{POWERBI_BASE_URL}/{WORKSPACE_ID}/datasets/{DATASET_ID}",
+        Constant.DATASOURCE_GET: "{POWERBI_BASE_URL}/{WORKSPACE_ID}/datasets/{DATASET_ID}/datasources",
+        Constant.REPORT_GET: "{POWERBI_BASE_URL}/{WORKSPACE_ID}/reports/{REPORT_ID}",
+        Constant.REPORT_LIST: "{POWERBI_BASE_URL}/{WORKSPACE_ID}/reports",
+        Constant.SCAN_GET: "{POWERBI_ADMIN_BASE_URL}/workspaces/scanStatus/{SCAN_ID}",
+        Constant.SCAN_RESULT_GET: "{POWERBI_ADMIN_BASE_URL}/workspaces/scanResult/{SCAN_ID}",
+        Constant.SCAN_CREATE: "{POWERBI_ADMIN_BASE_URL}/workspaces/getInfo",
+        Constant.PAGE_BY_REPORT: "{POWERBI_BASE_URL}/{WORKSPACE_ID}/reports/{REPORT_ID}/pages",
+    }
+
+    SCOPE: str = "https://analysis.windows.net/powerbi/api/.default"
+    BASE_URL: str = "https://api.powerbi.com/v1.0/myorg/groups"
+    ADMIN_BASE_URL: str = "https://api.powerbi.com/v1.0/myorg/admin"
+    AUTHORITY: str = "https://login.microsoftonline.com/"
+
+    @dataclass
+    class Workspace:
+        """
+        PowerBi Workspace
+        """
+
+        id: str
+        name: str
+        state: str
+        dashboards: List[Any]
+        datasets: Dict[str, "PowerBiAPI.PowerBIDataset"]
+
+    @dataclass
+    class DataSource:
+        """
+        PowerBi
+        """
+
+        id: str
+        type: str
+        raw_connection_detail: Dict
+
+        def __members(self):
+            return (self.id,)
+
+        def __eq__(self, instance):
+            return (
+                isinstance(instance, PowerBiAPI.DataSource)
+                and self.__members() == instance.__members()
+            )
+
+        def __hash__(self):
+            return hash(self.__members())
+
+    @dataclass
+    class Table:
+        name: str
+        full_name: str
+        expression: Optional[str]
+
+    # dataclasses for PowerBi Dashboard
+    @dataclass
+    class PowerBIDataset:
+        id: str
+        name: str
+        webUrl: Optional[str]
+        workspace_id: str
+        # Table in datasets
+        tables: List["PowerBiAPI.Table"]
+
+        def get_urn_part(self):
+            return f"datasets.{self.id}"
+
+        def __members(self):
+            return (self.id,)
+
+        def __eq__(self, instance):
+            return (
+                isinstance(instance, PowerBiAPI.PowerBIDataset)
+                and self.__members() == instance.__members()
+            )
+
+        def __hash__(self):
+            return hash(self.__members())
+
+    @dataclass
+    class Page:
+        id: str
+        displayName: str
+        name: str
+        order: int
+
+        def get_urn_part(self):
+            return f"pages.{self.id}"
+
+    @dataclass
+    class User:
+        id: str
+        displayName: str
+        emailAddress: str
+        graphId: str
+        principalType: str
+
+        def get_urn_part(self):
+            return f"users.{self.id}"
+
+        def __members(self):
+            return (self.id,)
+
+        def __eq__(self, instance):
+            return (
+                isinstance(instance, PowerBiAPI.User)
+                and self.__members() == instance.__members()
+            )
+
+        def __hash__(self):
+            return hash(self.__members())
+
+    @dataclass
+    class Report:
+        id: str
+        name: str
+        webUrl: str
+        embedUrl: str
+        description: str
+        dataset: Optional["PowerBiAPI.PowerBIDataset"]
+        pages: List["PowerBiAPI.Page"]
+        users: List["PowerBiAPI.User"]
+
+        def get_urn_part(self):
+            return f"reports.{self.id}"
+
+    @dataclass
+    class Tile:
+        class CreatedFrom(Enum):
+            REPORT = "Report"
+            DATASET = "Dataset"
+            VISUALIZATION = "Visualization"
+            UNKNOWN = "UNKNOWN"
+
+        id: str
+        title: str
+        embedUrl: str
+        dataset: Optional["PowerBiAPI.PowerBIDataset"]
+        report: Optional[Any]
+        createdFrom: CreatedFrom
+
+        def get_urn_part(self):
+            return f"charts.{self.id}"
+
+    @dataclass
+    class Dashboard:
+        id: str
+        displayName: str
+        embedUrl: str
+        webUrl: str
+        isReadOnly: Any
+        workspace_id: str
+        workspace_name: str
+        tiles: List["PowerBiAPI.Tile"]
+        users: List["PowerBiAPI.User"]
+
+        def get_urn_part(self):
+            return f"dashboards.{self.id}"
+
+        def __members(self):
+            return (self.id,)
+
+        def __eq__(self, instance):
+            return (
+                isinstance(instance, PowerBiAPI.Dashboard)
+                and self.__members() == instance.__members()
+            )
+
+        def __hash__(self):
+            return hash(self.__members())
+
+    def __init__(self, config: PowerBiAPIConfig) -> None:
+        self.__config: PowerBiAPIConfig = config
+        self.__access_token: str = ""
+        # Power-Bi Auth (Service Principal Auth)
+        self.__msal_client = msal.ConfidentialClientApplication(
+            self.__config.client_id,
+            client_credential=self.__config.client_secret,
+            authority=PowerBiAPI.AUTHORITY + self.__config.tenant_id,
+        )
+
+        # Test connection by generating a access token
+        LOGGER.info("Trying to connect to {}".format(self.__get_authority_url()))
+        self.get_access_token()
+        LOGGER.info("Able to connect to {}".format(self.__get_authority_url()))
+
+    def __get_authority_url(self):
+        return "{}{}".format(PowerBiAPI.AUTHORITY, self.__config.tenant_id)
+
+    def __get_users(self, workspace_id: str, entity: str, _id: str) -> List[User]:
+        """
+        Get user for the given PowerBi entity
+        """
+        users: List[PowerBiAPI.User] = []
+        if self.__config.extract_ownership is False:
+            LOGGER.info(
+                "ExtractOwnership capabilities is disabled from configuration and hence returning empty users list"
+            )
+            return users
+
+        user_list_endpoint: str = PowerBiAPI.API_ENDPOINTS[Constant.ENTITY_USER_LIST]
+        # Replace place holders
+        user_list_endpoint = user_list_endpoint.format(
+            POWERBI_ADMIN_BASE_URL=PowerBiAPI.ADMIN_BASE_URL,
+            ENTITY=entity,
+            ENTITY_ID=_id,
+        )
+        # Hit PowerBi
+        LOGGER.info(f"Request to URL={user_list_endpoint}")
+        response = requests.get(
+            user_list_endpoint,
+            headers={Constant.Authorization: self.get_access_token()},
+        )
+
+        # Check if we got response from PowerBi
+        if response.status_code != 200:
+            LOGGER.warning(
+                "Failed to fetch user list from power-bi. http_status=%s. message=%s",
+                response.status_code,
+                response.text,
+            )
+
+            LOGGER.info(f"{Constant.WorkspaceId}={workspace_id}")
+            LOGGER.info(f"{Constant.ENTITY}={entity}")
+            LOGGER.info(f"{Constant.ID}={_id}")
+            raise ConnectionError("Failed to fetch the user list from the power-bi")
+
+        users_dict: List[Any] = response.json()[Constant.VALUE]
+
+        # Iterate through response and create a list of PowerBiAPI.Dashboard
+        users = [
+            PowerBiAPI.User(
+                id=instance.get("identifier"),
+                displayName=instance.get("displayName"),
+                emailAddress=instance.get("emailAddress"),
+                graphId=instance.get("graphId"),
+                principalType=instance.get("principalType"),
+            )
+            for instance in users_dict
+        ]
+
+        return users
+
+    def __get_report(
+        self, workspace_id: str, report_id: str
+    ) -> Optional["PowerBiAPI.Report"]:
+        """
+        Fetch the report from PowerBi for the given report identifier
+        """
+        if workspace_id is None or report_id is None:
+            LOGGER.info("Input values are None")
+            LOGGER.info(f"{Constant.WorkspaceId}={workspace_id}")
+            LOGGER.info(f"{Constant.ReportId}={report_id}")
+            return None
+
+        report_get_endpoint: str = PowerBiAPI.API_ENDPOINTS[Constant.REPORT_GET]
+        # Replace place holders
+        report_get_endpoint = report_get_endpoint.format(
+            POWERBI_BASE_URL=PowerBiAPI.BASE_URL,
+            WORKSPACE_ID=workspace_id,
+            REPORT_ID=report_id,
+        )
+        # Hit PowerBi
+        LOGGER.info(f"Request to report URL={report_get_endpoint}")
+        response = requests.get(
+            report_get_endpoint,
+            headers={Constant.Authorization: self.get_access_token()},
+        )
+
+        # Check if we got response from PowerBi
+        if response.status_code != 200:
+            message: str = "Failed to fetch report from power-bi for"
+            LOGGER.warning(message)
+            LOGGER.warning(f"{Constant.WorkspaceId}={workspace_id}")
+            LOGGER.warning(f"{Constant.ReportId}={report_id}")
+            raise ConnectionError(message)
+
+        response_dict = response.json()
+
+        return PowerBiAPI.Report(
+            id=response_dict.get("id"),
+            name=response_dict.get("name"),
+            webUrl=response_dict.get("webUrl"),
+            embedUrl=response_dict.get("embedUrl"),
+            description=response_dict.get("description"),
+            users=[],
+            pages=[],
+            dataset=self.get_dataset(
+                workspace_id=workspace_id, dataset_id=response_dict.get("datasetId")
+            ),
+        )
+
+    def get_access_token(self):
+        if self.__access_token != "":
+            LOGGER.info("Returning the cached access token")
+            return self.__access_token
+
+        LOGGER.info("Generating PowerBi access token")
+
+        auth_response = self.__msal_client.acquire_token_for_client(
+            scopes=[PowerBiAPI.SCOPE]
+        )
+
+        if not auth_response.get("access_token"):
+            LOGGER.warning(
+                "Failed to generate the PowerBi access token. Please check input configuration"
+            )
+            raise ConfigurationError(
+                "Powerbi authorization failed . Please check your input configuration."
+            )
+
+        LOGGER.info("Generated PowerBi access token")
+
+        self.__access_token = "Bearer {}".format(auth_response.get("access_token"))
+
+        LOGGER.debug(f"{Constant.PBIAccessToken}={self.__access_token}")
+
+        return self.__access_token
+
+    def get_dashboard_users(self, dashboard: Dashboard) -> List[User]:
+        """
+        Return list of dashboard users
+        """
+        return self.__get_users(
+            workspace_id=dashboard.workspace_id, entity="dashboards", _id=dashboard.id
+        )
+
+    def get_dashboards(self, workspace: Workspace) -> List[Dashboard]:
+        """
+        Get the list of dashboard from PowerBi for the given workspace identifier
+
+        TODO: Pagination. As per REST API doc (https://docs.microsoft.com/en-us/rest/api/power-bi/dashboards/get
+        -dashboards), there is no information available on pagination
+        """
+        dashboard_list_endpoint: str = PowerBiAPI.API_ENDPOINTS[Constant.DASHBOARD_LIST]
+        # Replace place holders
+        dashboard_list_endpoint = dashboard_list_endpoint.format(
+            POWERBI_BASE_URL=PowerBiAPI.BASE_URL, WORKSPACE_ID=workspace.id
+        )
+        # Hit PowerBi
+        LOGGER.info(f"Request to URL={dashboard_list_endpoint}")
+        response = requests.get(
+            dashboard_list_endpoint,
+            headers={Constant.Authorization: self.get_access_token()},
+        )
+
+        # Check if we got response from PowerBi
+        if response.status_code != 200:
+            LOGGER.warning("Failed to fetch dashboard list from power-bi for")
+            LOGGER.warning(f"{Constant.WorkspaceId}={workspace.id}")
+            raise ConnectionError(
+                "Failed to fetch the dashboard list from the power-bi"
+            )
+
+        dashboards_dict: List[Any] = response.json()[Constant.VALUE]
+
+        # Iterate through response and create a list of PowerBiAPI.Dashboard
+        dashboards: List[PowerBiAPI.Dashboard] = [
+            PowerBiAPI.Dashboard(
+                id=instance.get("id"),
+                isReadOnly=instance.get("isReadOnly"),
+                displayName=instance.get("displayName"),
+                embedUrl=instance.get("embedUrl"),
+                webUrl=instance.get("webUrl"),
+                workspace_id=workspace.id,
+                workspace_name=workspace.name,
+                tiles=[],
+                users=[],
+            )
+            for instance in dashboards_dict
+            if instance is not None
+        ]
+
+        return dashboards
+
+    def get_dataset(self, workspace_id: str, dataset_id: str) -> Any:
+        """
+        Fetch the dataset from PowerBi for the given dataset identifier
+        """
+        if workspace_id is None or dataset_id is None:
+            LOGGER.info("Input values are None")
+            LOGGER.info(f"{Constant.WorkspaceId}={workspace_id}")
+            LOGGER.info(f"{Constant.DatasetId}={dataset_id}")
+            return None
+
+        dataset_get_endpoint: str = PowerBiAPI.API_ENDPOINTS[Constant.DATASET_GET]
+        # Replace place holders
+        dataset_get_endpoint = dataset_get_endpoint.format(
+            POWERBI_BASE_URL=PowerBiAPI.BASE_URL,
+            WORKSPACE_ID=workspace_id,
+            DATASET_ID=dataset_id,
+        )
+        # Hit PowerBi
+        LOGGER.info(f"Request to dataset URL={dataset_get_endpoint}")
+        response = requests.get(
+            dataset_get_endpoint,
+            headers={Constant.Authorization: self.get_access_token()},
+        )
+
+        # Check if we got response from PowerBi
+        if response.status_code != 200:
+            message: str = "Failed to fetch dataset from power-bi for"
+            LOGGER.warning(message)
+            LOGGER.warning(f"{Constant.WorkspaceId}={workspace_id}")
+            LOGGER.warning(f"{Constant.DatasetId}={dataset_id}")
+            raise ConnectionError(message)
+
+        response_dict = response.json()
+        LOGGER.debug("datasets = {}".format(response_dict))
+        # PowerBi Always return the webURL, in-case if it is None then setting complete webURL to None instead of
+        # None/details
+        return PowerBiAPI.PowerBIDataset(
+            id=response_dict.get("id"),
+            name=response_dict.get("name"),
+            webUrl="{}/details".format(response_dict.get("webUrl"))
+            if response_dict.get("webUrl") is not None
+            else None,
+            workspace_id=workspace_id,
+            tables=[],
+        )
+
+    def get_data_sources(
+        self, dataset: PowerBIDataset
+    ) -> Optional[Dict[str, "PowerBiAPI.DataSource"]]:
+        """
+        Fetch the data source from PowerBi for the given dataset
+        """
+
+        datasource_get_endpoint: str = PowerBiAPI.API_ENDPOINTS[Constant.DATASOURCE_GET]
+        # Replace place holders
+        datasource_get_endpoint = datasource_get_endpoint.format(
+            POWERBI_BASE_URL=PowerBiAPI.BASE_URL,
+            WORKSPACE_ID=dataset.workspace_id,
+            DATASET_ID=dataset.id,
+        )
+        # Hit PowerBi
+        LOGGER.info(f"Request to datasource URL={datasource_get_endpoint}")
+        response = requests.get(
+            datasource_get_endpoint,
+            headers={Constant.Authorization: self.get_access_token()},
+        )
+
+        # Check if we got response from PowerBi
+        if response.status_code != 200:
+            message: str = "Failed to fetch datasource from power-bi for"
+            LOGGER.warning(message)
+            LOGGER.warning("{}={}".format(Constant.WorkspaceId, dataset.workspace_id))
+            LOGGER.warning("{}={}".format(Constant.DatasetId, dataset.id))
+            LOGGER.warning("{}={}".format(Constant.HTTP_RESPONSE_TEXT, response.text))
+            LOGGER.warning(
+                "{}={}".format(Constant.HTTP_RESPONSE_STATUS_CODE, response.status_code)
+            )
+
+            raise ConnectionError(message)
+
+        res = response.json()
+        value = res["value"]
+        if len(value) == 0:
+            LOGGER.info(
+                f"datasource is not found for dataset {dataset.name}({dataset.id})"
+            )
+
+            return None
+
+        data_sources: Dict[str, "PowerBiAPI.DataSource"] = {}
+        LOGGER.debug("data-sources = {}".format(value))
+        for datasource_dict in value:
+            # Create datasource instance with basic detail available
+            datasource = PowerBiAPI.DataSource(
+                id=datasource_dict.get(
+                    "datasourceId"
+                ),  # datasourceId is not available in all cases
+                type=datasource_dict["datasourceType"],
+                raw_connection_detail=datasource_dict["connectionDetails"],
+            )
+
+            data_sources[datasource.id] = datasource
+
+        return data_sources
+
+    def get_tiles(self, workspace: Workspace, dashboard: Dashboard) -> List[Tile]:
+
+        """
+        Get the list of tiles from PowerBi for the given workspace identifier
+
+        TODO: Pagination. As per REST API doc (https://docs.microsoft.com/en-us/rest/api/power-bi/dashboards/get
+        -tiles), there is no information available on pagination
+
+        """
+
+        def new_dataset_or_report(tile_instance: Any) -> dict:
+            """
+            Find out which is the data source for tile. It is either REPORT or DATASET
+            """
+            report_fields = {
+                "dataset": (
+                    workspace.datasets[tile_instance.get("datasetId")]
+                    if tile_instance.get("datasetId") is not None
+                    else None
+                ),
+                "report": (
+                    self.__get_report(
+                        workspace_id=workspace.id,
+                        report_id=tile_instance.get("reportId"),
+                    )
+                    if tile_instance.get("reportId") is not None
+                    else None
+                ),
+                "createdFrom": PowerBiAPI.Tile.CreatedFrom.UNKNOWN,
+            }
+
+            # Tile is either created from report or dataset or from custom visualization
+            if report_fields["report"] is not None:
+                report_fields["createdFrom"] = PowerBiAPI.Tile.CreatedFrom.REPORT
+            elif report_fields["dataset"] is not None:
+                report_fields["createdFrom"] = PowerBiAPI.Tile.CreatedFrom.DATASET
+            else:
+                report_fields["createdFrom"] = PowerBiAPI.Tile.CreatedFrom.VISUALIZATION
+
+            LOGGER.info(
+                "Tile %s(%s) is created from %s",
+                tile_instance.get("title"),
+                tile_instance.get("id"),
+                report_fields["createdFrom"],
+            )
+
+            return report_fields
+
+        tile_list_endpoint: str = PowerBiAPI.API_ENDPOINTS[Constant.TILE_LIST]
+        # Replace place holders
+        tile_list_endpoint = tile_list_endpoint.format(
+            POWERBI_BASE_URL=PowerBiAPI.BASE_URL,
+            WORKSPACE_ID=dashboard.workspace_id,
+            DASHBOARD_ID=dashboard.id,
+        )
+        # Hit PowerBi
+        LOGGER.info("Request to URL={}".format(tile_list_endpoint))
+        response = requests.get(
+            tile_list_endpoint,
+            headers={Constant.Authorization: self.get_access_token()},
+        )
+
+        # Check if we got response from PowerBi
+        if response.status_code != 200:
+            LOGGER.warning("Failed to fetch tiles list from power-bi for")
+            LOGGER.warning("{}={}".format(Constant.WorkspaceId, workspace.id))
+            LOGGER.warning("{}={}".format(Constant.DashboardId, dashboard.id))
+            raise ConnectionError("Failed to fetch the tile list from the power-bi")
+
+        # Iterate through response and create a list of PowerBiAPI.Dashboard
+        tile_dict: List[Any] = response.json()[Constant.VALUE]
+        LOGGER.debug("Tile Dict = {}".format(tile_dict))
+        tiles: List[PowerBiAPI.Tile] = [
+            PowerBiAPI.Tile(
+                id=instance.get("id"),
+                title=instance.get("title"),
+                embedUrl=instance.get("embedUrl"),
+                **new_dataset_or_report(instance),
+            )
+            for instance in tile_dict
+            if instance is not None
+        ]
+
+        return tiles
+
+    def get_pages_by_report(
+        self, workspace_id: str, report_id: str
+    ) -> List["PowerBiAPI.Page"]:
+        """
+        Fetch the report from PowerBi for the given report identifier
+        """
+        if workspace_id is None or report_id is None:
+            LOGGER.info("workspace_id or report_id is None")
+            return []
+
+        pages_endpoint: str = PowerBiAPI.API_ENDPOINTS[Constant.PAGE_BY_REPORT]
+        # Replace place holders
+        pages_endpoint = pages_endpoint.format(
+            POWERBI_BASE_URL=PowerBiAPI.BASE_URL,
+            WORKSPACE_ID=workspace_id,
+            REPORT_ID=report_id,
+        )
+        # Hit PowerBi
+        LOGGER.info(f"Request to pages URL={pages_endpoint}")
+        response = requests.get(
+            pages_endpoint,
+            headers={Constant.Authorization: self.get_access_token()},
+        )
+
+        # Check if we got response from PowerBi
+        if response.status_code != 200:
+            message: str = "Failed to fetch reports from power-bi for"
+            LOGGER.warning(message)
+            LOGGER.warning(f"{Constant.WorkspaceId}={workspace_id}")
+            raise ConnectionError(message)
+
+        response_dict = response.json()
+        return [
+            PowerBiAPI.Page(
+                id="{}.{}".format(report_id, raw_instance["name"].replace(" ", "_")),
+                name=raw_instance["name"],
+                displayName=raw_instance.get("displayName"),
+                order=raw_instance.get("order"),
+            )
+            for raw_instance in response_dict["value"]
+        ]
+
+    def get_reports(
+        self, workspace: "PowerBiAPI.Workspace"
+    ) -> List["PowerBiAPI.Report"]:
+        """
+        Fetch the report from PowerBi for the given report identifier
+        """
+        if workspace is None:
+            LOGGER.info("workspace is None")
+            return []
+
+        report_list_endpoint: str = PowerBiAPI.API_ENDPOINTS[Constant.REPORT_LIST]
+        # Replace place holders
+        report_list_endpoint = report_list_endpoint.format(
+            POWERBI_BASE_URL=PowerBiAPI.BASE_URL,
+            WORKSPACE_ID=workspace.id,
+        )
+        # Hit PowerBi
+        LOGGER.info(f"Request to report URL={report_list_endpoint}")
+        response = requests.get(
+            report_list_endpoint,
+            headers={Constant.Authorization: self.get_access_token()},
+        )
+
+        # Check if we got response from PowerBi
+        if response.status_code != 200:
+            message: str = "Failed to fetch reports from power-bi for"
+            LOGGER.warning(message)
+            LOGGER.warning(f"{Constant.WorkspaceId}={workspace.id}")
+            raise ConnectionError(message)
+
+        response_dict = response.json()
+        reports: List["PowerBiAPI.Report"] = [
+            PowerBiAPI.Report(
+                id=raw_instance["id"],
+                name=raw_instance.get("name"),
+                webUrl=raw_instance.get("webUrl"),
+                embedUrl=raw_instance.get("embedUrl"),
+                description=raw_instance.get("description"),
+                pages=self.get_pages_by_report(
+                    workspace_id=workspace.id, report_id=raw_instance["id"]
+                ),
+                users=self.__get_users(
+                    workspace_id=workspace.id, entity="reports", _id=raw_instance["id"]
+                ),
+                dataset=workspace.datasets.get(raw_instance.get("datasetId")),
+            )
+            for raw_instance in response_dict["value"]
+        ]
+
+        return reports
+
+    # flake8: noqa: C901
+    def get_workspace(
+        self, workspace_id: str, reporter: PowerBiDashboardSourceReport
+    ) -> Workspace:
+        """
+        Return Workspace for the given workspace identifier i.e. workspace_id
+        """
+        scan_create_endpoint = PowerBiAPI.API_ENDPOINTS[Constant.SCAN_CREATE]
+        scan_create_endpoint = scan_create_endpoint.format(
+            POWERBI_ADMIN_BASE_URL=PowerBiAPI.ADMIN_BASE_URL
+        )
+
+        def create_scan_job():
+            """
+            Create scan job on PowerBi for the workspace
+            """
+            request_body = {"workspaces": [workspace_id]}
+
+            res = requests.post(
+                scan_create_endpoint,
+                data=request_body,
+                params={
+                    "datasetExpressions": True,
+                    "datasetSchema": True,
+                    "datasourceDetails": True,
+                    "getArtifactUsers": True,
+                    "lineage": True,
+                },
+                headers={Constant.Authorization: self.get_access_token()},
+            )
+
+            if res.status_code not in (200, 202):
+                message = f"API({scan_create_endpoint}) return error code {res.status_code} for workspace id({workspace_id})"
+
+                LOGGER.warning(message)
+
+                raise ConnectionError(message)
+            # Return Id of Scan created for the given workspace
+            id = res.json()["id"]
+            LOGGER.info("Scan id({})".format(id))
+            return id
+
+        def wait_for_scan_to_complete(scan_id: str, timeout: int) -> Any:
+            """
+            Poll the PowerBi service for workspace scan to complete
+            """
+            minimum_sleep = 3
+            if timeout < minimum_sleep:
+                LOGGER.info(
+                    f"Setting timeout to minimum_sleep time {minimum_sleep} seconds"
+                )
+                timeout = minimum_sleep
+
+            max_trial = timeout // minimum_sleep
+            LOGGER.info(f"Max trial {max_trial}")
+            scan_get_endpoint = PowerBiAPI.API_ENDPOINTS[Constant.SCAN_GET]
+            scan_get_endpoint = scan_get_endpoint.format(
+                POWERBI_ADMIN_BASE_URL=PowerBiAPI.ADMIN_BASE_URL, SCAN_ID=scan_id
+            )
+
+            LOGGER.info(f"Hitting URL={scan_get_endpoint}")
+
+            trail = 1
+            while True:
+                LOGGER.info(f"Trial = {trail}")
+                res = requests.get(
+                    scan_get_endpoint,
+                    headers={Constant.Authorization: self.get_access_token()},
+                )
+                if res.status_code != 200:
+                    message = f"API({scan_get_endpoint}) return error code {res.status_code} for scan id({scan_id})"
+
+                    LOGGER.warning(message)
+
+                    raise ConnectionError(message)
+
+                if res.json()["status"].upper() == "Succeeded".upper():
+                    LOGGER.info(f"Scan result is available for scan id({scan_id})")
+                    return True
+
+                if trail == max_trial:
+                    break
+                LOGGER.info(f"Sleeping for {minimum_sleep} seconds")
+                sleep(minimum_sleep)
+                trail += 1
+
+            # Result is not available
+            return False
+
+        def get_scan_result(scan_id: str) -> dict:
+            LOGGER.info("Fetching scan  result")
+            LOGGER.info(f"{Constant.SCAN_ID}={scan_id}")
+            scan_result_get_endpoint = PowerBiAPI.API_ENDPOINTS[
+                Constant.SCAN_RESULT_GET
+            ]
+            scan_result_get_endpoint = scan_result_get_endpoint.format(
+                POWERBI_ADMIN_BASE_URL=PowerBiAPI.ADMIN_BASE_URL, SCAN_ID=scan_id
+            )
+
+            LOGGER.info(f"Hitting URL={scan_result_get_endpoint}")
+            res = requests.get(
+                scan_result_get_endpoint,
+                headers={Constant.Authorization: self.get_access_token()},
+            )
+            if res.status_code != 200:
+                message = f"API({scan_result_get_endpoint}) return error code {res.status_code} for scan id({scan_id})"
+
+                LOGGER.warning(message)
+
+                raise ConnectionError(message)
+
+            return res.json()["workspaces"][0]
+
+        def json_to_dataset_map(scan_result: dict) -> dict:
+            """
+            Filter out "dataset" from scan_result and return PowerBiAPI.Dataset instance set
+            """
+            datasets: Optional[Any] = scan_result.get("datasets")
+            dataset_map: dict = {}
+
+            if datasets is None or len(datasets) == 0:
+                LOGGER.warning(
+                    f'Workspace {scan_result["name"]}({scan_result["id"]}) does not have datasets'
+                )
+
+                LOGGER.info("Returning empty datasets")
+                return dataset_map
+
+            for dataset_dict in datasets:
+                dataset_instance: PowerBiAPI.PowerBIDataset = self.get_dataset(
+                    workspace_id=scan_result["id"],
+                    dataset_id=dataset_dict["id"],
+                )
+                dataset_map[dataset_instance.id] = dataset_instance
+                # set dataset-name
+                dataset_name: str = (
+                    dataset_instance.name
+                    if dataset_instance.name is not None
+                    else dataset_instance.id
+                )
+
+                for table in dataset_dict["tables"]:
+                    expression: str = (
+                        table["source"][0]["expression"]
+                        if table.get("source") is not None and len(table["source"]) > 0
+                        else None
+                    )
+                    dataset_instance.tables.append(
+                        PowerBiAPI.Table(
+                            name=table["name"],
+                            full_name="{}.{}".format(
+                                dataset_name.replace(" ", "_"),
+                                table["name"].replace(" ", "_"),
+                            ),
+                            expression=expression,
+                        )
+                    )
+
+            return dataset_map
+
+        def init_dashboard_tiles(workspace: PowerBiAPI.Workspace) -> None:
+            for dashboard in workspace.dashboards:
+                dashboard.tiles = self.get_tiles(workspace, dashboard=dashboard)
+
+            return None
+
+        LOGGER.info("Creating scan job for workspace")
+        LOGGER.info("{}={}".format(Constant.WorkspaceId, workspace_id))
+        LOGGER.info("Hitting URL={}".format(scan_create_endpoint))
+        scan_id = create_scan_job()
+        LOGGER.info("Waiting for scan to complete")
+        if (
+            wait_for_scan_to_complete(
+                scan_id=scan_id, timeout=self.__config.scan_timeout
+            )
+            is False
+        ):
+            raise ValueError(
+                "Workspace detail is not available. Please increase scan_timeout to wait."
+            )
+
+        # Scan is complete lets take the result
+        scan_result = get_scan_result(scan_id=scan_id)
+
+        LOGGER.debug(f"scan result = %s", json.dumps(scan_result, indent=1))
+        workspace = PowerBiAPI.Workspace(
+            id=scan_result["id"],
+            name=scan_result["name"],
+            state=scan_result["state"],
+            datasets={},
+            dashboards=[],
+        )
+        # Get workspace dashboards
+        workspace.dashboards = self.get_dashboards(workspace)
+
+        workspace.datasets = json_to_dataset_map(scan_result)
+        init_dashboard_tiles(workspace)
+
+        return workspace

From 61c1d2def810ba4c60ef29b7b9b307d7e49eb4e8 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Mon, 12 Dec 2022 21:58:07 +0530
Subject: [PATCH 20/53] mssql server support

---
 .../ingestion/source/powerbi/m_parser.py      | 23 ++++++++++-
 .../integration/powerbi/test_m_parser.py      | 39 ++++++++++++++-----
 2 files changed, 51 insertions(+), 11 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
index 0b40639f8aadf..b12f804c00b73 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
@@ -26,6 +26,7 @@ class SupportedDataPlatform(Enum):
     POSTGRES_SQL = "PostgreSQL"
     ORACLE = "Oracle"
     SNOWFLAKE = "Snowflake"
+    MS_SQL = "Sql"
 
 
 POWERBI_TO_DATAHUB_DATA_PLATFORM_MAPPING: Dict[str, str] = {
@@ -252,7 +253,16 @@ def get_full_table_name(self, output_variable: str) -> Optional[str]:
         pass
 
 
-class PostgresMQueryResolver(BaseMQueryResolver):
+class DefaultTwoStepDataAccessSources(BaseMQueryResolver, ABC):
+    """
+    These are the DataSource for which PowerBI Desktop generates default M-Query of following pattern
+        let
+            Source = Sql.Database("localhost", "library"),
+            dbo_book_issue = Source{[Schema="dbo",Item="book_issue"]}[Data]
+        in
+            dbo_book_issue
+    """
+
     def get_full_table_name(self, output_variable: str) -> Optional[str]:
         variable_statement: Optional[Tree] = _get_variable_statement(
             self.parse_tree, output_variable
@@ -292,10 +302,17 @@ def get_full_table_name(self, output_variable: str) -> Optional[str]:
         database_name: str = cast(List[str], arg_list)[1]  # 1st token is database name
         return cast(Optional[str], f"{database_name}.{schema_name}.{table_name}")
 
+
+class PostgresMQueryResolver(DefaultTwoStepDataAccessSources):
     def get_platform(self) -> str:
         return SupportedDataPlatform.POSTGRES_SQL.value
 
 
+class MSSqlMQueryResolver(DefaultTwoStepDataAccessSources):
+    def get_platform(self) -> str:
+        return SupportedDataPlatform.MS_SQL.value
+
+
 class OracleMQueryResolver(BaseMQueryResolver):
     def get_platform(self) -> str:
         return SupportedDataPlatform.ORACLE.value
@@ -474,6 +491,7 @@ def _get_resolver(parse_tree: Tree) -> Optional[Type["BaseMQueryResolver"]]:
     f"{SupportedDataPlatform.POSTGRES_SQL.value}.Database": PostgresMQueryResolver,
     f"{SupportedDataPlatform.ORACLE.value}.Database": OracleMQueryResolver,
     f"{SupportedDataPlatform.SNOWFLAKE.value}.Databases": SnowflakeMQueryResolver,
+    f"{SupportedDataPlatform.MS_SQL.value}.Database": MSSqlMQueryResolver,
 }  # type :ignore
 
 
@@ -506,7 +524,8 @@ def get_upstream_tables(
 
     try:
         parse_tree: Tree = _parse_expression(table.expression)
-    except lark.exceptions.UnexpectedCharacters:
+    except lark.exceptions.UnexpectedCharacters as e:
+        LOGGER.debug(f"Fail to parse expression {table.expression}", exc_info=e)
         reporter.report_warning(
             table.full_name, f"UnSupported expression = {table.expression}"
         )
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index 3b5f9dbd515c3..4a0c0bcf17f7b 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -24,8 +24,9 @@
     "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select *,#(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS Agent,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Industries'#(lf)and TARGET_TEAM = 'Enterprise'\", null, [EnableFolding=true])\nin\n    Source",
     'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="Select#(lf)*,#(lf)concat((UPPER(REPLACE(SALES_SPECIALIST,\'-\',\'\'))),#(lf)LEFT(CAST(INVOICE_DATE AS DATE),4)+LEFT(RIGHT(CAST(INVOICE_DATE AS DATE),5),2)) AS AGENT_KEY,#(lf)CASE#(lf)    WHEN CLASS = \'Software\' and (NOT(PRODUCT in (\'ADV\', \'Adv\') and left(ACCOUNT_ID,2)=\'10\') #(lf)    or V_ENTERPRISE_INVOICED_REVENUE.TYPE = \'Manual Adjustment\') THEN INVOICE_AMOUNT#(lf)    WHEN V_ENTERPRISE_INVOICED_REVENUE.TYPE IN (\'Recurring\',\'0\') THEN INVOICE_AMOUNT#(lf)    ELSE 0#(lf)END as SOFTWARE_INV#(lf)#(lf)from V_ENTERPRISE_INVOICED_REVENUE", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #"Added Conditional Column" = Table.AddColumn(Source, "Services", each if [CLASS] = "Services" then [INVOICE_AMOUNT] else 0),\n    #"Added Custom" = Table.AddColumn(#"Added Conditional Column", "Advanced New Sites", each if [PRODUCT] = "ADV"\nor [PRODUCT] = "Adv"\nthen [NEW_SITE]\nelse 0)\nin\n    #"Added Custom"',
     'let\n    Source = Snowflake.Databases("xaa48144.snowflakecomputing.com","GSL_TEST_WH",[Role="ACCOUNTADMIN"]),\n Source2 = PostgreSQL.Database("localhost", "mics"),\n  public_order_date = Source2{[Schema="public",Item="order_date"]}[Data],\n    GSL_TEST_DB_Database = Source{[Name="GSL_TEST_DB",Kind="Database"]}[Data],\n  PUBLIC_Schema = GSL_TEST_DB_Database{[Name="PUBLIC",Kind="Schema"]}[Data],\n   SALES_ANALYST_VIEW_View = PUBLIC_Schema{[Name="SALES_ANALYST_VIEW",Kind="View"]}[Data],\n  two_source_table  = Table.Combine({public_order_date, SALES_ANALYST_VIEW_View})\n in\n    two_source_table',
-    'let\n Source = PostgreSQL.Database("localhost"  ,   "mics"      ),\n  public_order_date =    Source{[Schema="public",Item="order_date"]}[Data] \n in \n public_order_date',
-    'let\n Source = Oracle.Database("localhost:1521/salesdb.GSLAB.COM", [HierarchicalNavigation=true]), HR = Source{[Schema="HR"]}[Data], EMPLOYEES1 = HR{[Name="EMPLOYEES"]}[Data] \n in EMPLOYEES1',
+    'let\n    Source = PostgreSQL.Database("localhost"  ,   "mics"      ),\n  public_order_date =    Source{[Schema="public",Item="order_date"]}[Data] \n in \n public_order_date',
+    'let\n    Source = Oracle.Database("localhost:1521/salesdb.GSLAB.COM", [HierarchicalNavigation=true]), HR = Source{[Schema="HR"]}[Data], EMPLOYEES1 = HR{[Name="EMPLOYEES"]}[Data] \n in EMPLOYEES1',
+    'let\n    Source = Sql.Database("localhost", "library"),\n dbo_book_issue = Source{[Schema="dbo",Item="book_issue"]}[Data]\n in dbo_book_issue',
 ]
 
 
@@ -107,6 +108,27 @@ def test_parse_m_query13():
     assert m_parser._get_output_variable(parse_tree) == "two_source_table"
 
 
+def test_snowflake_regular_case():
+    q: str = M_QUERIES[0]
+    table: PowerBiAPI.Table = PowerBiAPI.Table(
+        expression=q,
+        name="virtual_order_table",
+        full_name="OrderDataSet.virtual_order_table",
+    )
+
+    reporter = PowerBiDashboardSourceReport()
+    data_platform_tables: List[DataPlatformTable] = m_parser.get_upstream_tables(
+        table, reporter
+    )
+
+    assert len(data_platform_tables) == 1
+    assert data_platform_tables[0].name == "TESTTABLE"
+    assert data_platform_tables[0].full_name == "PBI_TEST.TEST.TESTTABLE"
+    assert (
+        data_platform_tables[0].platform_type == SupportedDataPlatform.SNOWFLAKE.value
+    )
+
+
 def test_postgres_regular_case():
     q: str = M_QUERIES[13]
     table: PowerBiAPI.Table = PowerBiAPI.Table(
@@ -148,8 +170,8 @@ def test_oracle_regular_case():
     assert data_platform_tables[0].platform_type == SupportedDataPlatform.ORACLE.value
 
 
-def test_snowflake_regular_case():
-    q: str = M_QUERIES[0]
+def test_mssql_regular_case():
+    q: str = M_QUERIES[15]
     table: PowerBiAPI.Table = PowerBiAPI.Table(
         expression=q,
         name="virtual_order_table",
@@ -157,13 +179,12 @@ def test_snowflake_regular_case():
     )
 
     reporter = PowerBiDashboardSourceReport()
+
     data_platform_tables: List[DataPlatformTable] = m_parser.get_upstream_tables(
         table, reporter
     )
 
     assert len(data_platform_tables) == 1
-    assert data_platform_tables[0].name == "TESTTABLE"
-    assert data_platform_tables[0].full_name == "PBI_TEST.TEST.TESTTABLE"
-    assert (
-        data_platform_tables[0].platform_type == SupportedDataPlatform.SNOWFLAKE.value
-    )
+    assert data_platform_tables[0].name == "book_issue"
+    assert data_platform_tables[0].full_name == "library.dbo.book_issue"
+    assert data_platform_tables[0].platform_type == SupportedDataPlatform.MS_SQL.value

From aad6f2946adaaa1623065914aab849b08626ee30 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Wed, 14 Dec 2022 15:58:31 +0530
Subject: [PATCH 21/53] WIP

---
 .../powerbi/{m_parser.py => m_parser2.py}     |  27 ++
 .../integration/powerbi/test_m_parser.py      | 316 +++++++++---------
 2 files changed, 190 insertions(+), 153 deletions(-)
 rename metadata-ingestion/src/datahub/ingestion/source/powerbi/{m_parser.py => m_parser2.py} (94%)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser2.py
similarity index 94%
rename from metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
rename to metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser2.py
index b12f804c00b73..2979e181ca248 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser2.py
@@ -515,6 +515,31 @@ def _parse_expression(expression: str) -> Tree:
     return parse_tree
 
 
+def _validate_parse_tree(supported_funcs: List[str], tree: Tree) -> Tuple[bool, str]:
+    """
+    :param tree: tree to validate as per functions supported by m_parser module
+    :return: first argument is False if validation is failed and second argument would contain the error message.
+             in-case of valid tree the first argument is True and second argument would be None.
+    """
+    _filter: List[Tree] = tree.find_data("invoke_expression")
+
+    valid: bool = False
+    message: Optional[str] = None
+
+    for node in _filter:
+        primary_expression_node: Optional[Tree] = _get_first_rule(node, "primary_expression")
+        if primary_expression_node is None:
+            continue
+        identifier_node: Optional[Tree] = _get_first_rule(primary_expression_node, "identifier")
+        if identifier_node is None:
+            continue
+
+        function_name: str = _make_function_name(identifier_node)
+        # This function should be in our supported function list
+        if function_name not in supported_funcs:
+            return False, f"function {function_name} is not supported"
+
+
 def get_upstream_tables(
     table: PowerBiAPI.Table, reporter: PowerBiDashboardSourceReport
 ) -> List[DataPlatformTable]:
@@ -524,6 +549,8 @@ def get_upstream_tables(
 
     try:
         parse_tree: Tree = _parse_expression(table.expression)
+        _validate_parse_tree([], parse_tree)
+        exit()
     except lark.exceptions.UnexpectedCharacters as e:
         LOGGER.debug(f"Fail to parse expression {table.expression}", exc_info=e)
         reporter.report_warning(
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index 4a0c0bcf17f7b..917532b783976 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -13,7 +13,7 @@
 M_QUERIES = [
     'let\n    Source = Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","PBI_TEST_WAREHOUSE_PROD",[Role="PBI_TEST_MEMBER"]),\n    PBI_TEST_Database = Source{[Name="PBI_TEST",Kind="Database"]}[Data],\n    TEST_Schema = PBI_TEST_Database{[Name="TEST",Kind="Schema"]}[Data],\n    TESTTABLE_Table = TEST_Schema{[Name="TESTTABLE",Kind="Table"]}[Data]\nin\n    TESTTABLE_Table',
     'let\n    Source = Value.NativeQuery(Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","operations_analytics_warehouse_prod",[Role="OPERATIONS_ANALYTICS_MEMBER"]){[Name="OPERATIONS_ANALYTICS"]}[Data], "SELECT#(lf)concat((UPPER(REPLACE(SELLER,\'-\',\'\'))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4", null, [EnableFolding=true]),\n    #"ADDed Conditional Column" = Table.AddColumn(Source, "SME Units ENT", each if [DEAL_TYPE] = "SME Unit" then [UNIT] else 0),\n    #"Added Conditional Column1" = Table.AddColumn(#"Added Conditional Column", "Banklink Units", each if [DEAL_TYPE] = "Banklink" then [UNIT] else 0),\n    #"Removed Columns" = Table.RemoveColumns(#"Added Conditional Column1",{"Banklink Units"}),\n    #"Added Custom" = Table.AddColumn(#"Removed Columns", "Banklink Units", each if [DEAL_TYPE] = "Banklink" and [SALES_TYPE] = "3 - Upsell"\nthen [UNIT]\n\nelse if [SALES_TYPE] = "Adjusted BL Migration"\nthen [UNIT]\n\nelse 0),\n    #"Added Custom1" = Table.AddColumn(#"Added Custom", "SME Units in $ (*$361)", each if [DEAL_TYPE] = "SME Unit" \nand [SALES_TYPE] <> "4 - Renewal"\n    then [UNIT] * 361\nelse 0),\n    #"Added Custom2" = Table.AddColumn(#"Added Custom1", "Banklink in $ (*$148)", each [Banklink Units] * 148)\nin\n    #"Added Custom2"',
-    'let\n    Source = Value.NativeQuery(Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","operations_analytics_warehouse_prod",[Role="OPERATIONS_ANALYTICS_MEMBER"]){[Name="OPERATIONS_ANALYTICS"]}[Data], "select #(lf)UPPER(REPLACE(AGENT_NAME,\'-\',\'\')) AS Agent,#(lf)TIER,#(lf)UPPER(MANAGER),#(lf)TEAM_TYPE,#(lf)DATE_TARGET,#(lf)MONTHID,#(lf)TARGET_TEAM,#(lf)SELLER_EMAIL,#(lf)concat((UPPER(REPLACE(AGENT_NAME,\'-\',\'\'))), MONTHID) as AGENT_KEY,#(lf)UNIT_TARGET AS SME_Quota,#(lf)AMV_TARGET AS Revenue_Quota,#(lf)SERVICE_QUOTA,#(lf)BL_TARGET,#(lf)SOFTWARE_QUOTA as Software_Quota#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = \'Accounting\'#(lf)and TARGET_TEAM = \'Enterprise\'", null, [EnableFolding=true]),\n    #"Added Conditional Column" = Table.AddColumn(Source, "Has PS Software Quota?", each if [TIER] = "Expansion (Medium)" then "Yes" else if [TIER] = "Acquisition" then "Yes" else "No")\nin\n    #"Added Conditional Column"',
+    'let\n    Source = Value.NativeQuery(Snowflake.Databases("bu20  658.ap-southeast-2.snowflakecomputing.com","operations_analytics_warehouse_prod",[Role="OPERATIONS_ANALYTICS_MEMBER"]){[Name="OPERATIONS_ANALYTICS"]}[Data], "select #(lf)UPPER(REPLACE(AGENT_NAME,\'-\',\'\')) AS Agent,#(lf)TIER,#(lf)UPPER(MANAGER),#(lf)TEAM_TYPE,#(lf)DATE_TARGET,#(lf)MONTHID,#(lf)TARGET_TEAM,#(lf)SELLER_EMAIL,#(lf)concat((UPPER(REPLACE(AGENT_NAME,\'-\',\'\'))), MONTHID) as AGENT_KEY,#(lf)UNIT_TARGET AS SME_Quota,#(lf)AMV_TARGET AS Revenue_Quota,#(lf)SERVICE_QUOTA,#(lf)BL_TARGET,#(lf)SOFTWARE_QUOTA as Software_Quota#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = \'Accounting\'#(lf)and TARGET_TEAM = \'Enterprise\'", null, [EnableFolding=true]),\n    #"Added Conditional Column" = Table.AddColumn(Source, "Has PS Software Quota?", each if [TIER] = "Expansion (Medium)" then "Yes" else if [TIER] = "Acquisition" then "Yes" else "No")\nin\n    #"Added Conditional Column"',
     'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="select *#(lf),concat((UPPER(REPLACE(CLIENT_MANAGER_QUOTED,\'-\',\'\'))), MONTHID) as AGENT_KEY#(lf),concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTHID) as CD_AGENT_KEY#(lf)#(lf)from V_OIP_ENT_2022"]),\n    #"Added Custom" = Table.AddColumn(Source, "OIP in $(*$350)", each [SALES_INVOICE_AMOUNT] * 350),\n    #"Changed Type" = Table.TransformColumnTypes(#"Added Custom",{{"OIP in $(*$350)", type number}})\nin\n    #"Changed Type"',
     'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="Select *,#(lf)#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_QUOTED,\'-\',\'\'))), #(lf)LEFT(CAST(DTE AS DATE),4)+LEFT(RIGHT(CAST(DTE AS DATE),5),2)) AS AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), #(lf)LEFT(CAST(DTE AS DATE),4)+LEFT(RIGHT(CAST(DTE AS DATE),5),2)) AS CD_AGENT_KEY#(lf)#(lf)from V_INVOICE_BOOKING_2022"]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"CLIENT_ID", Int64.Type}}),\n    #"Added Conditional Column" = Table.AddColumn(#"Changed Type", "PS Software (One-Off)", each if Text.Contains([REVENUE_TYPE], "Software") then [Inv_Amt] else if Text.Contains([REVENUE_TYPE], "Tax Seminar") then [Inv_Amt] else 0),\n    #"Filtered Rows" = Table.SelectRows(#"Added Conditional Column", each true),\n    #"Duplicated Column" = Table.DuplicateColumn(#"Filtered Rows", "CLIENT_ID", "CLIENT_ID - Copy"),\n    #"Changed Type1" = Table.TransformColumnTypes(#"Duplicated Column",{{"CLIENT_ID - Copy", type text}}),\n    #"Renamed Columns" = Table.RenameColumns(#"Changed Type1",{{"CLIENT_ID - Copy", "CLIENT_ID for Filter"}})\nin\n    #"Renamed Columns"',
     'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="SELECT *,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,\'-\',\'\'))), #(lf)LEFT(CAST(MONTH_DATE AS DATE),4)+LEFT(RIGHT(CAST(MONTH_DATE AS DATE),5),2)) AS AGENT_KEY#(lf)#(lf)FROM dbo.V_ARR_ADDS"]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"MONTH_DATE", type date}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "Month", each Date.Month([MONTH_DATE]))\nin\n    #"Added Custom"',
@@ -30,161 +30,171 @@
 ]
 
 
-def test_parse_m_query1():
-    expression: str = M_QUERIES[0]
-    parse_tree: Tree = m_parser._parse_expression(expression)
-    assert m_parser._get_output_variable(parse_tree) == "TESTTABLE_Table"
+# def test_parse_m_query1():
+#     expression: str = M_QUERIES[0]
+#     parse_tree: Tree = m_parser._parse_expression(expression)
+#     assert m_parser._get_output_variable(parse_tree) == "TESTTABLE_Table"
+#
+#
+# def test_parse_m_query2():
+#     expression: str = M_QUERIES[1]
+#     parse_tree: Tree = m_parser._parse_expression(expression)
+#     assert m_parser._get_output_variable(parse_tree) == '"Added Custom2"'
+#
+#
+# def test_parse_m_query3():
+#     expression: str = M_QUERIES[2]
+#     parse_tree: Tree = m_parser._parse_expression(expression)
+#     assert m_parser._get_output_variable(parse_tree) == '"Added Conditional Column"'
+#
+#
+# def test_parse_m_query4():
+#     expression: str = M_QUERIES[3]
+#     parse_tree: Tree = m_parser._parse_expression(expression)
+#     assert m_parser._get_output_variable(parse_tree) == '"Changed Type"'
+#
+#
+# def test_parse_m_query5():
+#     expression: str = M_QUERIES[4]
+#     parse_tree: Tree = m_parser._parse_expression(expression)
+#     assert m_parser._get_output_variable(parse_tree) == '"Renamed Columns"'
+#
+#
+# def test_parse_m_query6():
+#     expression: str = M_QUERIES[5]
+#     parse_tree: Tree = m_parser._parse_expression(expression)
+#     assert m_parser._get_output_variable(parse_tree) == '"Added Custom"'
+#
+#
+# def test_parse_m_query7():
+#     expression: str = M_QUERIES[6]
+#     parse_tree: Tree = m_parser._parse_expression(expression)
+#     assert m_parser._get_output_variable(parse_tree) == "Source"
+#
+#
+# def test_parse_m_query8():
+#     expression: str = M_QUERIES[7]
+#     parse_tree: Tree = m_parser._parse_expression(expression)
+#     assert m_parser._get_output_variable(parse_tree) == '"Added Custom1"'
+#
+#
+# def test_parse_m_query9():
+#     expression: str = M_QUERIES[8]
+#     parse_tree: Tree = m_parser._parse_expression(expression)
+#     assert m_parser._get_output_variable(parse_tree) == '"Added Custom1"'
+#
+#
+# def test_parse_m_query10():
+#     expression: str = M_QUERIES[9]
+#     parse_tree: Tree = m_parser._parse_expression(expression)
+#     assert m_parser._get_output_variable(parse_tree) == '"Changed Type1"'
+#
+#
+# def test_parse_m_query11():
+#     expression: str = M_QUERIES[10]
+#     parse_tree: Tree = m_parser._parse_expression(expression)
+#     assert m_parser._get_output_variable(parse_tree) == "Source"
+#
+#
+# def test_parse_m_query12():
+#     expression: str = M_QUERIES[11]
+#     parse_tree: Tree = m_parser._parse_expression(expression)
+#     assert m_parser._get_output_variable(parse_tree) == '"Added Custom"'
+#
+#
+# def test_parse_m_query13():
+#     expression: str = M_QUERIES[12]
+#     parse_tree: Tree = m_parser._parse_expression(expression)
+#     assert m_parser._get_output_variable(parse_tree) == "two_source_table"
+#
+#
+# def test_snowflake_regular_case():
+#     q: str = M_QUERIES[0]
+#     table: PowerBiAPI.Table = PowerBiAPI.Table(
+#         expression=q,
+#         name="virtual_order_table",
+#         full_name="OrderDataSet.virtual_order_table",
+#     )
+#
+#     reporter = PowerBiDashboardSourceReport()
+#     data_platform_tables: List[DataPlatformTable] = m_parser.get_upstream_tables(
+#         table, reporter
+#     )
+#
+#     assert len(data_platform_tables) == 1
+#     assert data_platform_tables[0].name == "TESTTABLE"
+#     assert data_platform_tables[0].full_name == "PBI_TEST.TEST.TESTTABLE"
+#     assert (
+#         data_platform_tables[0].platform_type == SupportedDataPlatform.SNOWFLAKE.value
+#     )
+#
+#
+# def test_postgres_regular_case():
+#     q: str = M_QUERIES[13]
+#     table: PowerBiAPI.Table = PowerBiAPI.Table(
+#         expression=q,
+#         name="virtual_order_table",
+#         full_name="OrderDataSet.virtual_order_table",
+#     )
+#
+#     reporter = PowerBiDashboardSourceReport()
+#     data_platform_tables: List[DataPlatformTable] = m_parser.get_upstream_tables(
+#         table, reporter
+#     )
+#
+#     assert len(data_platform_tables) == 1
+#     assert data_platform_tables[0].name == "order_date"
+#     assert data_platform_tables[0].full_name == "mics.public.order_date"
+#     assert (
+#         data_platform_tables[0].platform_type
+#         == SupportedDataPlatform.POSTGRES_SQL.value
+#     )
+#
+#
+# def test_oracle_regular_case():
+#     q: str = M_QUERIES[14]
+#     table: PowerBiAPI.Table = PowerBiAPI.Table(
+#         expression=q,
+#         name="virtual_order_table",
+#         full_name="OrderDataSet.virtual_order_table",
+#     )
+#
+#     reporter = PowerBiDashboardSourceReport()
+#     data_platform_tables: List[DataPlatformTable] = m_parser.get_upstream_tables(
+#         table, reporter
+#     )
+#
+#     assert len(data_platform_tables) == 1
+#     assert data_platform_tables[0].name == "EMPLOYEES"
+#     assert data_platform_tables[0].full_name == "salesdb.HR.EMPLOYEES"
+#     assert data_platform_tables[0].platform_type == SupportedDataPlatform.ORACLE.value
+#
+#
+# def test_mssql_regular_case():
+#     q: str = M_QUERIES[15]
+#     table: PowerBiAPI.Table = PowerBiAPI.Table(
+#         expression=q,
+#         name="virtual_order_table",
+#         full_name="OrderDataSet.virtual_order_table",
+#     )
+#
+#     reporter = PowerBiDashboardSourceReport()
+#
+#     data_platform_tables: List[DataPlatformTable] = m_parser.get_upstream_tables(
+#         table, reporter
+#     )
+#
+#     assert len(data_platform_tables) == 1
+#     assert data_platform_tables[0].name == "book_issue"
+#     assert data_platform_tables[0].full_name == "library.dbo.book_issue"
+#     assert data_platform_tables[0].platform_type == SupportedDataPlatform.MS_SQL.value
+
+def test_advance_use_case():
 
-
-def test_parse_m_query2():
-    expression: str = M_QUERIES[1]
-    parse_tree: Tree = m_parser._parse_expression(expression)
-    assert m_parser._get_output_variable(parse_tree) == '"Added Custom2"'
-
-
-def test_parse_m_query3():
-    expression: str = M_QUERIES[2]
-    parse_tree: Tree = m_parser._parse_expression(expression)
-    assert m_parser._get_output_variable(parse_tree) == '"Added Conditional Column"'
-
-
-def test_parse_m_query4():
-    expression: str = M_QUERIES[3]
-    parse_tree: Tree = m_parser._parse_expression(expression)
-    assert m_parser._get_output_variable(parse_tree) == '"Changed Type"'
-
-
-def test_parse_m_query5():
-    expression: str = M_QUERIES[4]
-    parse_tree: Tree = m_parser._parse_expression(expression)
-    assert m_parser._get_output_variable(parse_tree) == '"Renamed Columns"'
-
-
-def test_parse_m_query6():
-    expression: str = M_QUERIES[5]
-    parse_tree: Tree = m_parser._parse_expression(expression)
-    assert m_parser._get_output_variable(parse_tree) == '"Added Custom"'
-
-
-def test_parse_m_query7():
-    expression: str = M_QUERIES[6]
-    parse_tree: Tree = m_parser._parse_expression(expression)
-    assert m_parser._get_output_variable(parse_tree) == "Source"
-
-
-def test_parse_m_query8():
-    expression: str = M_QUERIES[7]
-    parse_tree: Tree = m_parser._parse_expression(expression)
-    assert m_parser._get_output_variable(parse_tree) == '"Added Custom1"'
-
-
-def test_parse_m_query9():
-    expression: str = M_QUERIES[8]
-    parse_tree: Tree = m_parser._parse_expression(expression)
-    assert m_parser._get_output_variable(parse_tree) == '"Added Custom1"'
-
-
-def test_parse_m_query10():
-    expression: str = M_QUERIES[9]
-    parse_tree: Tree = m_parser._parse_expression(expression)
-    assert m_parser._get_output_variable(parse_tree) == '"Changed Type1"'
-
-
-def test_parse_m_query11():
-    expression: str = M_QUERIES[10]
-    parse_tree: Tree = m_parser._parse_expression(expression)
-    assert m_parser._get_output_variable(parse_tree) == "Source"
-
-
-def test_parse_m_query12():
-    expression: str = M_QUERIES[11]
-    parse_tree: Tree = m_parser._parse_expression(expression)
-    assert m_parser._get_output_variable(parse_tree) == '"Added Custom"'
-
-
-def test_parse_m_query13():
-    expression: str = M_QUERIES[12]
-    parse_tree: Tree = m_parser._parse_expression(expression)
-    assert m_parser._get_output_variable(parse_tree) == "two_source_table"
-
-
-def test_snowflake_regular_case():
-    q: str = M_QUERIES[0]
     table: PowerBiAPI.Table = PowerBiAPI.Table(
-        expression=q,
+        expression=M_QUERIES[1],
         name="virtual_order_table",
         full_name="OrderDataSet.virtual_order_table",
     )
+    m_parser.get_upstream_tables(table, PowerBiDashboardSourceReport())
 
-    reporter = PowerBiDashboardSourceReport()
-    data_platform_tables: List[DataPlatformTable] = m_parser.get_upstream_tables(
-        table, reporter
-    )
-
-    assert len(data_platform_tables) == 1
-    assert data_platform_tables[0].name == "TESTTABLE"
-    assert data_platform_tables[0].full_name == "PBI_TEST.TEST.TESTTABLE"
-    assert (
-        data_platform_tables[0].platform_type == SupportedDataPlatform.SNOWFLAKE.value
-    )
-
-
-def test_postgres_regular_case():
-    q: str = M_QUERIES[13]
-    table: PowerBiAPI.Table = PowerBiAPI.Table(
-        expression=q,
-        name="virtual_order_table",
-        full_name="OrderDataSet.virtual_order_table",
-    )
-
-    reporter = PowerBiDashboardSourceReport()
-    data_platform_tables: List[DataPlatformTable] = m_parser.get_upstream_tables(
-        table, reporter
-    )
-
-    assert len(data_platform_tables) == 1
-    assert data_platform_tables[0].name == "order_date"
-    assert data_platform_tables[0].full_name == "mics.public.order_date"
-    assert (
-        data_platform_tables[0].platform_type
-        == SupportedDataPlatform.POSTGRES_SQL.value
-    )
-
-
-def test_oracle_regular_case():
-    q: str = M_QUERIES[14]
-    table: PowerBiAPI.Table = PowerBiAPI.Table(
-        expression=q,
-        name="virtual_order_table",
-        full_name="OrderDataSet.virtual_order_table",
-    )
-
-    reporter = PowerBiDashboardSourceReport()
-    data_platform_tables: List[DataPlatformTable] = m_parser.get_upstream_tables(
-        table, reporter
-    )
-
-    assert len(data_platform_tables) == 1
-    assert data_platform_tables[0].name == "EMPLOYEES"
-    assert data_platform_tables[0].full_name == "salesdb.HR.EMPLOYEES"
-    assert data_platform_tables[0].platform_type == SupportedDataPlatform.ORACLE.value
-
-
-def test_mssql_regular_case():
-    q: str = M_QUERIES[15]
-    table: PowerBiAPI.Table = PowerBiAPI.Table(
-        expression=q,
-        name="virtual_order_table",
-        full_name="OrderDataSet.virtual_order_table",
-    )
-
-    reporter = PowerBiDashboardSourceReport()
-
-    data_platform_tables: List[DataPlatformTable] = m_parser.get_upstream_tables(
-        table, reporter
-    )
-
-    assert len(data_platform_tables) == 1
-    assert data_platform_tables[0].name == "book_issue"
-    assert data_platform_tables[0].full_name == "library.dbo.book_issue"
-    assert data_platform_tables[0].platform_type == SupportedDataPlatform.MS_SQL.value

From 33a31506bb603753d8b27f346d0468575d534964 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Wed, 14 Dec 2022 22:32:28 +0530
Subject: [PATCH 22/53] mssql key

---
 .../src/datahub/ingestion/source/powerbi/m_parser.py             | 1 +
 1 file changed, 1 insertion(+)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
index b12f804c00b73..41ccdeffd1940 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser.py
@@ -33,6 +33,7 @@ class SupportedDataPlatform(Enum):
     SupportedDataPlatform.POSTGRES_SQL.value: "postgres",
     SupportedDataPlatform.ORACLE.value: "oracle",
     SupportedDataPlatform.SNOWFLAKE.value: "snowflake",
+    SupportedDataPlatform.MS_SQL.value: "mssql",
 }
 
 

From aecb69585310be774895d2f81ef2b1dda6cc8aa1 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Thu, 15 Dec 2022 17:12:51 +0530
Subject: [PATCH 23/53] WIP

---
 .../ingestion/source/powerbi/config.py        |   9 +-
 .../source/powerbi/m_query/__init__.py        |   0
 .../source/powerbi/m_query/parser.py          |  67 +++
 .../source/powerbi/m_query/resolver.py        | 406 ++++++++++++++++++
 .../source/powerbi/m_query/tree_function.py   | 143 ++++++
 .../source/powerbi/m_query/validator.py       |  43 ++
 .../ingestion/source/powerbi/powerbi.py       |  27 +-
 .../integration/powerbi/test_m_parser.py      | 329 +++++++-------
 8 files changed, 845 insertions(+), 179 deletions(-)
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/__init__.py
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/validator.py

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
index db0f5858d997e..5d6c3dc0529d7 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
@@ -7,7 +7,6 @@
 import datahub.emitter.mce_builder as builder
 from datahub.configuration.source_common import DEFAULT_ENV, EnvBasedSourceConfigBase
 from datahub.ingestion.api.source import SourceReport
-from datahub.ingestion.source.powerbi import m_parser
 
 
 class Constant:
@@ -130,16 +129,14 @@ class PowerBiAPIConfig(EnvBasedSourceConfigBase):
 
     @validator("dataset_type_mapping")
     @classmethod
-    def check_dataset_type_mapping(cls, value):
-        # For backward compatibility map input PostgreSql to PostgreSQL
+    def map_data_platform(cls, value):
+        # For backward compatibility convert input PostgreSql to PostgreSQL
+        # PostgreSQL is name of the data-platform in M-Query
         if "PostgreSql" in value.keys():
             platform_name = value["PostgreSql"]
             del value["PostgreSql"]
             value["PostgreSQL"] = platform_name
 
-        for key in value.keys():
-            if key not in m_parser.POWERBI_TO_DATAHUB_DATA_PLATFORM_MAPPING.keys():
-                raise ValueError(f"DataPlatform {key} is not supported")
         return value
 
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/__init__.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
new file mode 100644
index 0000000000000..aa929b475529a
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
@@ -0,0 +1,67 @@
+import importlib.resources as pkg_resource
+import logging
+from typing import List, Optional
+
+import lark
+from lark import Lark, Tree
+
+from datahub.ingestion.source.powerbi.config import PowerBiDashboardSourceReport
+from datahub.ingestion.source.powerbi.proxy import PowerBiAPI
+from datahub.ingestion.source.powerbi.m_query import validator
+from datahub.ingestion.source.powerbi.m_query import resolver
+
+LOGGER = logging.getLogger(__name__)
+
+
+def _parse_expression(expression: str) -> Tree:
+    # Read lexical grammar as text
+    grammar: str = pkg_resource.read_text(
+        "datahub.ingestion.source.powerbi", "powerbi-lexical-grammar.rule"
+    )
+
+    # Create lark parser for the grammar text
+    lark_parser = Lark(grammar, start="let_expression", regex=True)
+
+    parse_tree: Tree = lark_parser.parse(expression)
+
+    LOGGER.debug("Parse Tree")
+    if (
+        LOGGER.level == logging.DEBUG
+    ):  # Guard condition to avoid heavy pretty() function call
+        LOGGER.debug(parse_tree.pretty())
+
+    return parse_tree
+
+
+def get_upstream_tables(
+    table: PowerBiAPI.Table, reporter: PowerBiDashboardSourceReport
+) -> List[resolver.DataPlatformTable]:
+    if table.expression is None:
+        reporter.report_warning(table.full_name, "Expression is none")
+        return []
+
+    try:
+        parse_tree: Tree = _parse_expression(table.expression)
+    except lark.exceptions.UnexpectedCharacters as e:
+        LOGGER.debug(f"Fail to parse expression {table.expression}", exc_info=e)
+        reporter.report_warning(
+            table.full_name, f"UnSupported expression = {table.expression}"
+        )
+        return []
+
+    resolver_enum: Optional[resolver.SupportedDataPlatform] = resolver.get_resolver(parse_tree)
+    if resolver_enum is None:
+        LOGGER.debug("Table full-name = %s", table.full_name)
+        LOGGER.debug("Expression = %s", table.expression)
+        reporter.report_warning(
+            table.full_name,
+            f"{table.full_name} M-Query resolver not found for the table expression",
+        )
+        return []
+
+    return resolver_enum.get_m_query_resolver()(
+        table=table,
+        parse_tree=parse_tree,
+        data_platform_pair=resolver_enum.get_data_platform_pair(),
+        reporter=reporter,
+    ).resolve_to_data_platform_table_list()  # type: ignore
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
new file mode 100644
index 0000000000000..a71db5f6b1145
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
@@ -0,0 +1,406 @@
+import logging
+from abc import ABC, abstractmethod
+from typing import Dict, Optional, List, cast, Tuple, Type, Any
+
+from lark import Tree
+
+from dataclasses import dataclass
+from enum import Enum
+
+from datahub.ingestion.source.powerbi.config import PowerBiDashboardSourceReport
+from datahub.ingestion.source.powerbi.proxy import PowerBiAPI
+
+from datahub.ingestion.source.powerbi.m_query import tree_function
+
+LOGGER = logging.getLogger(__name__)
+
+
+@dataclass
+class DataPlatformPair:
+    datahub_data_platform_name: str
+    powerbi_data_platform_name: str
+
+
+@dataclass
+class DataPlatformTable:
+    name: str
+    full_name: str
+    data_platform_pair: DataPlatformPair
+
+
+class AbstractMQueryResolver(ABC):
+    pass
+
+
+class AbstractDataAccessMQueryResolver(AbstractMQueryResolver, ABC):
+    table: PowerBiAPI.Table
+    parse_tree: Tree
+    reporter: PowerBiDashboardSourceReport
+    data_platform_pair: DataPlatformPair
+
+    def __init__(
+        self,
+        table: PowerBiAPI.Table,
+        parse_tree: Tree,
+        data_platform_pair: DataPlatformPair,
+        reporter: PowerBiDashboardSourceReport,
+    ):
+        self.table = table
+        self.parse_tree = parse_tree
+        self.reporter = reporter
+        self.data_platform_pair = data_platform_pair
+
+    @abstractmethod
+    def resolve_to_data_platform_table_list(self) -> List[DataPlatformTable]:
+        pass
+
+
+class BaseMQueryResolver(AbstractDataAccessMQueryResolver, ABC):
+    def get_item_selector_tokens(
+        self, variable_statement: Tree
+    ) -> Tuple[Optional[str], Optional[Dict[str, str]]]:
+        expression_tree: Optional[Tree] = tree_function.first_expression_func(variable_statement)
+        if expression_tree is None:
+            LOGGER.debug("Expression tree not found")
+            LOGGER.debug(variable_statement.pretty())
+            return None, None
+
+        item_selector: Optional[Tree] = tree_function.first_item_selector_func(expression_tree)
+        if item_selector is None:
+            LOGGER.debug("Item Selector not found in tree")
+            LOGGER.debug(variable_statement.pretty())
+            return None, None
+
+        identifier_tree: Optional[Tree] = tree_function.first_identifier_func(expression_tree)
+        if identifier_tree is None:
+            LOGGER.debug("Identifier not found in tree")
+            LOGGER.debug(variable_statement.pretty())
+            return None, None
+
+        # remove whitespaces and quotes from token
+        tokens: List[str] = tree_function.strip_char_from_list(
+            tree_function.remove_whitespaces_from_list(tree_function.token_values(cast(Tree, item_selector))),
+            '"',
+        )
+        identifier: List[str] = tree_function.token_values(
+            cast(Tree, identifier_tree)
+        )  # type :ignore
+        # convert tokens to dict
+        iterator = iter(tokens)
+        # cast to satisfy lint
+        return identifier[0], dict(zip(iterator, iterator))
+
+    def get_argument_list(self, variable_statement: Tree) -> Optional[List[str]]:
+        expression_tree: Optional[Tree] = tree_function.first_expression_func(variable_statement)
+        if expression_tree is None:
+            LOGGER.debug("First expression rule not found in input tree")
+            return None
+
+        argument_list: Optional[Tree] = tree_function.first_arg_list_func(expression_tree)
+        if argument_list is None:
+            LOGGER.debug("First argument-list rule not found in input tree")
+            return None
+
+        # remove whitespaces and quotes from token
+        tokens: List[str] = tree_function.strip_char_from_list(
+            tree_function.remove_whitespaces_from_list(tree_function.token_values(argument_list)), '"'
+        )
+        return tokens
+
+    def resolve_to_data_platform_table_list(self) -> List[DataPlatformTable]:
+        data_platform_tables: List[DataPlatformTable] = []
+        # Look for output variable
+        output_variable: Optional[str] = tree_function.get_output_variable(self.parse_tree)
+        if output_variable is None:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-output-variable",
+                "output-variable not found in table expression",
+            )
+            return data_platform_tables
+
+        full_table_name: Optional[str] = self.get_full_table_name(output_variable)
+        if full_table_name is None:
+            LOGGER.debug(
+                "Fail to form full_table_name for PowerBI DataSet table %s",
+                self.table.full_name,
+            )
+            return data_platform_tables
+
+        return [
+            DataPlatformTable(
+                name=full_table_name.split(".")[-1],
+                full_name=full_table_name,
+                data_platform_pair=self.data_platform_pair
+            ),
+        ]
+
+    @abstractmethod
+    def get_full_table_name(self, output_variable: str) -> Optional[str]:
+        pass
+
+
+class DefaultTwoStepDataAccessSources(BaseMQueryResolver, ABC):
+    """
+    These are the DataSource for which PowerBI Desktop generates default M-Query of following pattern
+        let
+            Source = Sql.Database("localhost", "library"),
+            dbo_book_issue = Source{[Schema="dbo",Item="book_issue"]}[Data]
+        in
+            dbo_book_issue
+    """
+
+    def get_full_table_name(self, output_variable: str) -> Optional[str]:
+        variable_statement: Optional[Tree] = tree_function.get_variable_statement(
+            self.parse_tree, output_variable
+        )
+        if variable_statement is None:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-variable-statement",
+                f"output variable ({output_variable}) statement not found in table expression",
+            )
+            return None
+        source, tokens = self.get_item_selector_tokens(cast(Tree, variable_statement))
+        if source is None or tokens is None:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-variable-statement",
+                "Schema detail not found in table expression",
+            )
+            return None
+
+        schema_name: str = tokens["Schema"]
+        table_name: str = tokens["Item"]
+        # Look for database-name
+        variable_statement = tree_function.get_variable_statement(self.parse_tree, source)
+        if variable_statement is None:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-source-statement",
+                f"source variable {source} statement not found in table expression",
+            )
+            return None
+        arg_list = self.get_argument_list(cast(Tree, variable_statement))
+        if arg_list is None or len(arg_list) < 1:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-database-arg-list",
+                "Expected number of argument not found in data-access function of table expression",
+            )
+            return None
+
+        database_name: str = cast(List[str], arg_list)[1]  # 1st token is database name
+        return cast(Optional[str], f"{database_name}.{schema_name}.{table_name}")
+
+
+class PostgresMQueryResolver(DefaultTwoStepDataAccessSources):
+    pass
+
+
+class MSSqlMQueryResolver(DefaultTwoStepDataAccessSources):
+    pass
+
+
+class OracleMQueryResolver(BaseMQueryResolver):
+
+    def _get_db_name(self, value: str) -> Optional[str]:
+        error_message: str = f"The target argument ({value}) should in the format of <host-name>:<port>/<db-name>[.<domain>]"
+        splitter_result: List[str] = value.split("/")
+        if len(splitter_result) != 2:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-oracle-target", error_message
+            )
+            return None
+
+        db_name = splitter_result[1].split(".")[0]
+
+        return db_name
+
+    def get_full_table_name(self, output_variable: str) -> Optional[str]:
+        # Find step for the output variable
+        variable_statement: Optional[Tree] = tree_function.get_variable_statement(
+            self.parse_tree, output_variable
+        )
+
+        if variable_statement is None:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-variable-statement",
+                f"output variable ({output_variable}) statement not found in table expression",
+            )
+            return None
+
+        schema_variable, tokens = self.get_item_selector_tokens(
+            cast(Tree, variable_statement)
+        )
+        if schema_variable is None or tokens is None:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-variable-statement",
+                "table name not found in table expression",
+            )
+            return None
+
+        table_name: str = tokens["Name"]
+
+        # Find step for the schema variable
+        variable_statement = tree_function.get_variable_statement(
+            self.parse_tree, cast(str, schema_variable)
+        )
+        if variable_statement is None:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-schema-variable-statement",
+                f"schema variable ({schema_variable}) statement not found in table expression",
+            )
+            return None
+
+        source_variable, tokens = self.get_item_selector_tokens(variable_statement)
+        if source_variable is None or tokens is None:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-variable-statement",
+                "Schema not found in table expression",
+            )
+            return None
+
+        schema_name: str = tokens["Schema"]
+
+        # Find step for the database access variable
+        variable_statement = tree_function.get_variable_statement(self.parse_tree, source_variable)
+        if variable_statement is None:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-source-variable-statement",
+                f"schema variable ({source_variable}) statement not found in table expression",
+            )
+            return None
+        arg_list = self.get_argument_list(variable_statement)
+        if arg_list is None or len(arg_list) < 1:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-database-arg-list",
+                "Expected number of argument not found in data-access function of table expression",
+            )
+            return None
+        # The first argument has database name. format localhost:1521/salesdb.GSLAB.COM
+        db_name: Optional[str] = self._get_db_name(arg_list[0])
+        if db_name is None:
+            LOGGER.debug(f"Fail to extract db name from the target {arg_list}")
+
+        return f"{db_name}.{schema_name}.{table_name}"
+
+
+class SnowflakeMQueryResolver(BaseMQueryResolver):
+
+    def get_full_table_name(self, output_variable: str) -> Optional[str]:
+        # Find step for the output variable
+        variable_statement: Optional[Tree] = tree_function.get_variable_statement(
+            self.parse_tree, output_variable
+        )
+
+        if variable_statement is None:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-variable-statement",
+                f"output variable ({output_variable}) statement not found in table expression",
+            )
+            return None
+
+        schema_variable, tokens = self.get_item_selector_tokens(variable_statement)
+        if schema_variable is None or tokens is None:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-variable-statement",
+                "table name not found in table expression",
+            )
+            return None
+
+        table_name: str = tokens["Name"]
+
+        # Find step for the schema variable
+        variable_statement = tree_function.get_variable_statement(self.parse_tree, schema_variable)
+        if variable_statement is None:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-schema-variable-statement",
+                f"schema variable ({schema_variable}) statement not found in table expression",
+            )
+            return None
+
+        source_variable, tokens = self.get_item_selector_tokens(variable_statement)
+        if source_variable is None or tokens is None:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-variable-statement",
+                "schema name not found in table expression",
+            )
+            return None
+
+        schema_name: str = tokens["Name"]
+
+        # Find step for the database access variable
+        variable_statement = tree_function.get_variable_statement(self.parse_tree, source_variable)
+        if variable_statement is None:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-source-variable-statement",
+                f"schema variable ({source_variable}) statement not found in table expression",
+            )
+            return None
+        _, tokens = self.get_item_selector_tokens(variable_statement)
+        if tokens is None:
+            self.reporter.report_warning(
+                f"{self.table.full_name}-variable-statement",
+                "database name not found in table expression",
+            )
+            return None
+
+        db_name: str = tokens["Name"]
+
+        return f"{db_name}.{schema_name}.{table_name}"
+
+
+class SupportedDataPlatform(Enum):
+    POSTGRES_SQL = (
+        DataPlatformPair(
+            powerbi_data_platform_name="PostgreSQL",
+            datahub_data_platform_name="postgres"
+        ),
+        PostgresMQueryResolver
+    )
+    ORACLE = (
+        DataPlatformPair(
+            powerbi_data_platform_name="Oracle",
+            datahub_data_platform_name="oracle"
+        ),
+        OracleMQueryResolver
+    )
+    SNOWFLAKE = (
+        DataPlatformPair(
+            powerbi_data_platform_name="Snowflake",
+            datahub_data_platform_name="snowflake"
+        ),
+        SnowflakeMQueryResolver
+    )
+    MS_SQL = (
+        DataPlatformPair(
+            powerbi_data_platform_name="Sql",
+            datahub_data_platform_name="mssql"
+        ),
+        MSSqlMQueryResolver
+    )
+
+    def get_data_platform_pair(self) -> DataPlatformPair:
+        return self.value[0]
+
+    def get_m_query_resolver(self) -> Type[BaseMQueryResolver]:
+        return self.value[1]
+
+
+def get_resolver(parse_tree: Tree) -> Optional[SupportedDataPlatform]:
+
+    _filter: Any = parse_tree.find_data("invoke_expression")
+
+    letter_tree: Tree = next(_filter).children[0]
+    data_access_func: str = tree_function.make_function_name(letter_tree)
+
+    LOGGER.debug(
+        "Looking for data-access(%s) resolver",
+        data_access_func,
+    )
+
+    # Take platform name from data_access_func variable
+    platform_name: str = data_access_func.split(".")[0]
+    for platform in SupportedDataPlatform:
+        if platform.get_data_platform_pair().powerbi_data_platform_name == platform_name:
+            return platform
+
+    LOGGER.info("M-Query resolver not found for data access function %s", data_access_func)
+
+    return None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py
new file mode 100644
index 0000000000000..91c9550903bd8
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py
@@ -0,0 +1,143 @@
+import logging
+from typing import Optional, List, Union, cast, Any
+
+from functools import partial
+
+from lark import Token, Tree
+
+LOGGER = logging.getLogger(__name__)
+
+
+def get_output_variable(root: Tree) -> Optional[str]:
+    in_expression_tree: Optional[Tree] = get_first_rule(root, "in_expression")
+    if in_expression_tree is None:
+        return None
+    # Get list of terminal value
+    # Remove any whitespaces
+    # Remove any spaces
+    return "".join(
+        strip_char_from_list(
+            remove_whitespaces_from_list(token_values(in_expression_tree)), " "
+        )
+    )
+
+
+def get_variable_statement(parse_tree: Tree, variable: str) -> Optional[Tree]:
+    _filter = parse_tree.find_data("variable")
+    # filter will return statement of the form <variable-name> = <expression>
+    # We are searching for Tree where variable-name is matching with provided variable
+    for tree in _filter:
+        values: List[str] = token_values(tree.children[0])
+        actual_value: str = "".join(strip_char_from_list(values, " "))
+        LOGGER.debug("Actual Value = %s", actual_value)
+        LOGGER.debug("Expected Value = %s", variable)
+
+        if actual_value == variable:
+            return tree
+
+    LOGGER.info("Provided variable(%s) not found in variable rule", variable)
+
+    return None
+
+
+def get_first_rule(tree: Tree, rule: str) -> Optional[Tree]:
+    """
+    Lark library doesn't have advance search function.
+    This function will return the first tree of provided rule
+    :param tree: Tree to search for the expression rule
+    :return: Tree
+    """
+
+    def internal(node: Union[Tree, Token]) -> Optional[Tree]:
+        if isinstance(node, Tree) and node.data == rule:
+            return node
+        if isinstance(node, Token):
+            return None
+
+        for child in cast(Tree, node).children:
+            child_node: Optional[Tree] = internal(child)
+            if child_node is not None:
+                return child_node
+
+        return None
+
+    expression_tree: Optional[Tree] = internal(tree)
+
+    return expression_tree
+
+
+def token_values(tree: Tree) -> List[str]:
+    """
+
+    :param tree: Tree to traverse
+    :return: List of leaf token data
+    """
+    values: List[str] = []
+
+    def internal(node: Union[Tree, Token]) -> None:
+        if isinstance(node, Token):
+            values.append(cast(Token, node).value)
+            return
+
+        for child in node.children:
+            internal(child)
+
+    internal(tree)
+
+    return values
+
+
+def remove_whitespaces_from_list(values: List[str]) -> List[str]:
+    result: List[str] = []
+    for item in values:
+        if item.strip() not in ("", "\n", "\t"):
+            result.append(item)
+
+    return result
+
+
+def strip_char_from_list(values: List[str], char: str) -> List[str]:
+    result: List[str] = []
+    for item in values:
+        result.append(item.strip(char))
+
+    return result
+
+
+def make_function_name(tree: Tree) -> str:
+    values: List[str] = token_values(tree)
+    return ".".join(values)
+
+
+def get_all_function_name(tree: Tree) -> List[str]:
+    """
+    Returns all function name present in input tree
+    :param tree: Input lexical tree
+    :return: list of function name
+    """
+    functions: List[str] = []
+
+    # List the all invoke_expression in the Tree
+    _filter: Any = tree.find_data("invoke_expression")
+
+    for node in _filter:
+        LOGGER.debug("Tree = %s", node.pretty())
+        primary_expression_node: Optional[Tree] = first_primary_expression_func(node)
+        if primary_expression_node is None:
+            continue
+
+        identifier_node: Optional[Tree] = first_identifier_func(primary_expression_node)
+        if identifier_node is None:
+            continue
+
+        functions.append(make_function_name(identifier_node))
+
+    return functions
+
+
+first_expression_func = partial(get_first_rule, rule="expression")
+first_item_selector_func = partial(get_first_rule, rule="item_selector")
+first_arg_list_func = partial(get_first_rule, rule="argument_list")
+first_identifier_func = partial(get_first_rule, rule="identifier")
+first_primary_expression_func = partial(get_first_rule, rule="primary_expression")
+first_identifier_func = partial(get_first_rule, rule="identifier")
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/validator.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/validator.py
new file mode 100644
index 0000000000000..9f3664bfb5f41
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/validator.py
@@ -0,0 +1,43 @@
+import logging
+
+from datahub.ingestion.source.powerbi.m_query import tree_function
+
+from typing import List, Tuple, Optional
+from lark import Tree
+
+LOGGER = logging.getLogger(__name__)
+
+
+def any_one_should_present(supported_funcs: List[str], functions: List[str]) -> Tuple[bool, Optional[str]]:
+    """
+    Anyone functions from supported_funcs should present in functions list
+    :param supported_funcs: List of function m_query module supports
+    :param functions: List of functions retrieved from expression
+    :return: True or False
+    """
+    for f in supported_funcs:
+        if f in functions:
+            return True, None
+
+    return False, f"Function from supported function list {supported_funcs} not found"
+
+
+def all_function_should_be_known(supported_funcs: List[str], functions: List[str]) -> Tuple[bool, Optional[str]]:
+    for f in functions:
+        if f not in supported_funcs:
+            return False, f"Function {f} is unknown"
+
+    return True, None
+
+
+def validate_parse_tree(supported_funcs: List[str], tree: Tree) -> Tuple[bool, str]:
+    """
+    :param supported_funcs: List of supported functions
+    :param tree: tree to validate as per functions supported by m_parser module
+    :return: first argument is False if validation is failed and second argument would contain the error message.
+             in-case of valid tree the first argument is True and second argument would be None.
+    """
+    functions: List[str] = tree_function.get_all_function_name(tree)
+    if len(functions) == 0:
+        return False, "Function call not found"
+
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
index f172cc37cc721..b573956b0fd7b 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
@@ -21,14 +21,15 @@
 )
 from datahub.ingestion.api.source import Source, SourceReport
 from datahub.ingestion.api.workunit import MetadataWorkUnit
-from datahub.ingestion.source.powerbi import m_parser
 from datahub.ingestion.source.powerbi.config import (
     Constant,
     PlatformDetail,
     PowerBiDashboardSourceConfig,
     PowerBiDashboardSourceReport,
 )
-from datahub.ingestion.source.powerbi.m_parser import DataPlatformTable
+
+from datahub.ingestion.source.powerbi.m_query import resolver
+from datahub.ingestion.source.powerbi.m_query import parser
 from datahub.ingestion.source.powerbi.proxy import PowerBiAPI
 from datahub.metadata.com.linkedin.pegasus2avro.common import ChangeAuditStamps
 from datahub.metadata.schema_classes import (
@@ -161,18 +162,17 @@ def __to_datahub_dataset(
             if self.__config.extract_lineage is True:
                 # Check if upstreams table is available, parse them and create dataset URN for each upstream table
                 upstreams: List[UpstreamClass] = []
-                upstream_tables: List[DataPlatformTable] = m_parser.get_upstream_tables(
+                upstream_tables: List[resolver.DataPlatformTable] = parser.get_upstream_tables(
                     table, self.__reporter
                 )
                 for upstream_table in upstream_tables:
+                    if upstream_table.data_platform_pair.powerbi_data_platform_name not in self.__config.dataset_type_mapping[upstream_table.platform_type]:
+                        continue
+
                     platform: Union[
                         str, PlatformDetail
                     ] = self.__config.dataset_type_mapping[upstream_table.platform_type]
-                    platform_name: str = (
-                        m_parser.POWERBI_TO_DATAHUB_DATA_PLATFORM_MAPPING[
-                            upstream_table.platform_type
-                        ]
-                    )
+                    platform_name: str = upstream_table.data_platform_pair.datahub_data_platform_name
                     platform_instance_name: Optional[str] = None
                     platform_env: str = DEFAULT_ENV
                     # Determine if PlatformDetail is provided
@@ -730,12 +730,21 @@ def create(cls, config_dict, ctx):
         config = PowerBiDashboardSourceConfig.parse_obj(config_dict)
         return cls(config, ctx)
 
+    def validate_dataset_type_mapping(self):
+        powerbi_data_platforms: List[str] = [data_platform.get_data_platform_pair().powerbi_data_platform_name for data_platform
+                                             in resolver.SupportedDataPlatform]
+
+        for key in self.source_config.keys():
+            if key not in powerbi_data_platforms:
+                raise ValueError(f"PowerBI DataPlatform {key} is not supported")
+
     def get_workunits(self) -> Iterable[MetadataWorkUnit]:
         """
         Datahub Ingestion framework invoke this method
         """
         LOGGER.info("PowerBi plugin execution is started")
-
+        # Validate dataset type mapping
+        self.validate_dataset_type_mapping()
         # Fetch PowerBi workspace for given workspace identifier
         workspace = self.powerbi_client.get_workspace(
             self.source_config.workspace_id, self.reporter
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index 917532b783976..ca9bcb5f7fad5 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -2,9 +2,12 @@
 
 from lark import Tree
 
-from datahub.ingestion.source.powerbi import m_parser
+from datahub.ingestion.source.powerbi.m_query import (
+    parser,
+    tree_function
+)
 from datahub.ingestion.source.powerbi.config import PowerBiDashboardSourceReport
-from datahub.ingestion.source.powerbi.m_parser import (
+from datahub.ingestion.source.powerbi.m_query.resolver import (
     DataPlatformTable,
     SupportedDataPlatform,
 )
@@ -30,171 +33,169 @@
 ]
 
 
-# def test_parse_m_query1():
-#     expression: str = M_QUERIES[0]
-#     parse_tree: Tree = m_parser._parse_expression(expression)
-#     assert m_parser._get_output_variable(parse_tree) == "TESTTABLE_Table"
-#
-#
-# def test_parse_m_query2():
-#     expression: str = M_QUERIES[1]
-#     parse_tree: Tree = m_parser._parse_expression(expression)
-#     assert m_parser._get_output_variable(parse_tree) == '"Added Custom2"'
-#
-#
-# def test_parse_m_query3():
-#     expression: str = M_QUERIES[2]
-#     parse_tree: Tree = m_parser._parse_expression(expression)
-#     assert m_parser._get_output_variable(parse_tree) == '"Added Conditional Column"'
-#
-#
-# def test_parse_m_query4():
-#     expression: str = M_QUERIES[3]
-#     parse_tree: Tree = m_parser._parse_expression(expression)
-#     assert m_parser._get_output_variable(parse_tree) == '"Changed Type"'
-#
-#
-# def test_parse_m_query5():
-#     expression: str = M_QUERIES[4]
-#     parse_tree: Tree = m_parser._parse_expression(expression)
-#     assert m_parser._get_output_variable(parse_tree) == '"Renamed Columns"'
-#
-#
-# def test_parse_m_query6():
-#     expression: str = M_QUERIES[5]
-#     parse_tree: Tree = m_parser._parse_expression(expression)
-#     assert m_parser._get_output_variable(parse_tree) == '"Added Custom"'
-#
-#
-# def test_parse_m_query7():
-#     expression: str = M_QUERIES[6]
-#     parse_tree: Tree = m_parser._parse_expression(expression)
-#     assert m_parser._get_output_variable(parse_tree) == "Source"
-#
-#
-# def test_parse_m_query8():
-#     expression: str = M_QUERIES[7]
-#     parse_tree: Tree = m_parser._parse_expression(expression)
-#     assert m_parser._get_output_variable(parse_tree) == '"Added Custom1"'
-#
-#
-# def test_parse_m_query9():
-#     expression: str = M_QUERIES[8]
-#     parse_tree: Tree = m_parser._parse_expression(expression)
-#     assert m_parser._get_output_variable(parse_tree) == '"Added Custom1"'
-#
-#
-# def test_parse_m_query10():
-#     expression: str = M_QUERIES[9]
-#     parse_tree: Tree = m_parser._parse_expression(expression)
-#     assert m_parser._get_output_variable(parse_tree) == '"Changed Type1"'
-#
-#
-# def test_parse_m_query11():
-#     expression: str = M_QUERIES[10]
-#     parse_tree: Tree = m_parser._parse_expression(expression)
-#     assert m_parser._get_output_variable(parse_tree) == "Source"
-#
-#
-# def test_parse_m_query12():
-#     expression: str = M_QUERIES[11]
-#     parse_tree: Tree = m_parser._parse_expression(expression)
-#     assert m_parser._get_output_variable(parse_tree) == '"Added Custom"'
-#
-#
-# def test_parse_m_query13():
-#     expression: str = M_QUERIES[12]
-#     parse_tree: Tree = m_parser._parse_expression(expression)
-#     assert m_parser._get_output_variable(parse_tree) == "two_source_table"
-#
-#
-# def test_snowflake_regular_case():
-#     q: str = M_QUERIES[0]
-#     table: PowerBiAPI.Table = PowerBiAPI.Table(
-#         expression=q,
-#         name="virtual_order_table",
-#         full_name="OrderDataSet.virtual_order_table",
-#     )
-#
-#     reporter = PowerBiDashboardSourceReport()
-#     data_platform_tables: List[DataPlatformTable] = m_parser.get_upstream_tables(
-#         table, reporter
-#     )
-#
-#     assert len(data_platform_tables) == 1
-#     assert data_platform_tables[0].name == "TESTTABLE"
-#     assert data_platform_tables[0].full_name == "PBI_TEST.TEST.TESTTABLE"
-#     assert (
-#         data_platform_tables[0].platform_type == SupportedDataPlatform.SNOWFLAKE.value
-#     )
-#
-#
-# def test_postgres_regular_case():
-#     q: str = M_QUERIES[13]
-#     table: PowerBiAPI.Table = PowerBiAPI.Table(
-#         expression=q,
-#         name="virtual_order_table",
-#         full_name="OrderDataSet.virtual_order_table",
-#     )
-#
-#     reporter = PowerBiDashboardSourceReport()
-#     data_platform_tables: List[DataPlatformTable] = m_parser.get_upstream_tables(
-#         table, reporter
-#     )
-#
-#     assert len(data_platform_tables) == 1
-#     assert data_platform_tables[0].name == "order_date"
-#     assert data_platform_tables[0].full_name == "mics.public.order_date"
-#     assert (
-#         data_platform_tables[0].platform_type
-#         == SupportedDataPlatform.POSTGRES_SQL.value
-#     )
-#
-#
-# def test_oracle_regular_case():
-#     q: str = M_QUERIES[14]
-#     table: PowerBiAPI.Table = PowerBiAPI.Table(
-#         expression=q,
-#         name="virtual_order_table",
-#         full_name="OrderDataSet.virtual_order_table",
-#     )
-#
-#     reporter = PowerBiDashboardSourceReport()
-#     data_platform_tables: List[DataPlatformTable] = m_parser.get_upstream_tables(
-#         table, reporter
-#     )
-#
-#     assert len(data_platform_tables) == 1
-#     assert data_platform_tables[0].name == "EMPLOYEES"
-#     assert data_platform_tables[0].full_name == "salesdb.HR.EMPLOYEES"
-#     assert data_platform_tables[0].platform_type == SupportedDataPlatform.ORACLE.value
-#
-#
-# def test_mssql_regular_case():
-#     q: str = M_QUERIES[15]
-#     table: PowerBiAPI.Table = PowerBiAPI.Table(
-#         expression=q,
-#         name="virtual_order_table",
-#         full_name="OrderDataSet.virtual_order_table",
-#     )
-#
-#     reporter = PowerBiDashboardSourceReport()
-#
-#     data_platform_tables: List[DataPlatformTable] = m_parser.get_upstream_tables(
-#         table, reporter
-#     )
-#
-#     assert len(data_platform_tables) == 1
-#     assert data_platform_tables[0].name == "book_issue"
-#     assert data_platform_tables[0].full_name == "library.dbo.book_issue"
-#     assert data_platform_tables[0].platform_type == SupportedDataPlatform.MS_SQL.value
-
-def test_advance_use_case():
+def test_parse_m_query1():
+    expression: str = M_QUERIES[0]
+    parse_tree: Tree = parser._parse_expression(expression)
+    assert tree_function.get_output_variable(parse_tree) == "TESTTABLE_Table"
+
+
+def test_parse_m_query2():
+    expression: str = M_QUERIES[1]
+    parse_tree: Tree = parser._parse_expression(expression)
+    assert tree_function.get_output_variable(parse_tree) == '"Added Custom2"'
+
+
+def test_parse_m_query3():
+    expression: str = M_QUERIES[2]
+    parse_tree: Tree = parser._parse_expression(expression)
+    assert tree_function.get_output_variable(parse_tree) == '"Added Conditional Column"'
+
+
+def test_parse_m_query4():
+    expression: str = M_QUERIES[3]
+    parse_tree: Tree = parser._parse_expression(expression)
+    assert tree_function.get_output_variable(parse_tree) == '"Changed Type"'
+
+
+def test_parse_m_query5():
+    expression: str = M_QUERIES[4]
+    parse_tree: Tree = parser._parse_expression(expression)
+    assert tree_function.get_output_variable(parse_tree) == '"Renamed Columns"'
+
+
+def test_parse_m_query6():
+    expression: str = M_QUERIES[5]
+    parse_tree: Tree = parser._parse_expression(expression)
+    assert tree_function.get_output_variable(parse_tree) == '"Added Custom"'
+
+
+def test_parse_m_query7():
+    expression: str = M_QUERIES[6]
+    parse_tree: Tree = parser._parse_expression(expression)
+    assert tree_function.get_output_variable(parse_tree) == "Source"
+
+
+def test_parse_m_query8():
+    expression: str = M_QUERIES[7]
+    parse_tree: Tree = parser._parse_expression(expression)
+    assert tree_function.get_output_variable(parse_tree) == '"Added Custom1"'
+
 
+def test_parse_m_query9():
+    expression: str = M_QUERIES[8]
+    parse_tree: Tree = parser._parse_expression(expression)
+    assert tree_function.get_output_variable(parse_tree) == '"Added Custom1"'
+
+
+def test_parse_m_query10():
+    expression: str = M_QUERIES[9]
+    parse_tree: Tree = parser._parse_expression(expression)
+    assert tree_function.get_output_variable(parse_tree) == '"Changed Type1"'
+
+
+def test_parse_m_query11():
+    expression: str = M_QUERIES[10]
+    parse_tree: Tree = parser._parse_expression(expression)
+    assert tree_function.get_output_variable(parse_tree) == "Source"
+
+
+def test_parse_m_query12():
+    expression: str = M_QUERIES[11]
+    parse_tree: Tree = parser._parse_expression(expression)
+    assert tree_function.get_output_variable(parse_tree) == '"Added Custom"'
+
+
+def test_parse_m_query13():
+    expression: str = M_QUERIES[12]
+    parse_tree: Tree = parser._parse_expression(expression)
+    assert tree_function.get_output_variable(parse_tree) == "two_source_table"
+
+
+def test_snowflake_regular_case():
+    q: str = M_QUERIES[0]
     table: PowerBiAPI.Table = PowerBiAPI.Table(
-        expression=M_QUERIES[1],
+        expression=q,
         name="virtual_order_table",
         full_name="OrderDataSet.virtual_order_table",
     )
-    m_parser.get_upstream_tables(table, PowerBiDashboardSourceReport())
+
+    reporter = PowerBiDashboardSourceReport()
+    data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
+        table, reporter
+    )
+
+    assert len(data_platform_tables) == 1
+    assert data_platform_tables[0].name == "TESTTABLE"
+    assert data_platform_tables[0].full_name == "PBI_TEST.TEST.TESTTABLE"
+    assert (
+        data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
+        == SupportedDataPlatform.SNOWFLAKE.get_data_platform_pair().powerbi_data_platform_name
+    )
+
+
+def test_postgres_regular_case():
+    q: str = M_QUERIES[13]
+    table: PowerBiAPI.Table = PowerBiAPI.Table(
+        expression=q,
+        name="virtual_order_table",
+        full_name="OrderDataSet.virtual_order_table",
+    )
+
+    reporter = PowerBiDashboardSourceReport()
+    data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
+        table, reporter
+    )
+
+    assert len(data_platform_tables) == 1
+    assert data_platform_tables[0].name == "order_date"
+    assert data_platform_tables[0].full_name == "mics.public.order_date"
+    assert (
+        data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
+        == SupportedDataPlatform.POSTGRES_SQL.get_data_platform_pair().powerbi_data_platform_name
+    )
+
+
+def test_oracle_regular_case():
+    q: str = M_QUERIES[14]
+    table: PowerBiAPI.Table = PowerBiAPI.Table(
+        expression=q,
+        name="virtual_order_table",
+        full_name="OrderDataSet.virtual_order_table",
+    )
+
+    reporter = PowerBiDashboardSourceReport()
+    data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
+        table, reporter
+    )
+
+    assert len(data_platform_tables) == 1
+    assert data_platform_tables[0].name == "EMPLOYEES"
+    assert data_platform_tables[0].full_name == "salesdb.HR.EMPLOYEES"
+    assert (
+            data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
+            == SupportedDataPlatform.ORACLE.get_data_platform_pair().powerbi_data_platform_name
+    )
+
+
+def test_mssql_regular_case():
+    q: str = M_QUERIES[15]
+    table: PowerBiAPI.Table = PowerBiAPI.Table(
+        expression=q,
+        name="virtual_order_table",
+        full_name="OrderDataSet.virtual_order_table",
+    )
+
+    reporter = PowerBiDashboardSourceReport()
+
+    data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
+        table, reporter
+    )
+
+    assert len(data_platform_tables) == 1
+    assert data_platform_tables[0].name == "book_issue"
+    assert data_platform_tables[0].full_name == "library.dbo.book_issue"
+    assert (
+        data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
+        == SupportedDataPlatform.MS_SQL.get_data_platform_pair().powerbi_data_platform_name
+    )
 

From 776a78760158c7df21c419e8a09b7485db6bffe9 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Thu, 15 Dec 2022 21:22:00 +0530
Subject: [PATCH 24/53] text fixes

---
 .../source/powerbi/m_query/parser.py          | 13 ++++++++-
 .../source/powerbi/m_query/resolver.py        | 27 ++++++++++++++-----
 .../source/powerbi/m_query/validator.py       | 17 +++++++++---
 .../integration/powerbi/test_m_parser.py      | 16 +++++++++++
 4 files changed, 62 insertions(+), 11 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
index aa929b475529a..341a9a215dfcb 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
@@ -6,6 +6,7 @@
 from lark import Lark, Tree
 
 from datahub.ingestion.source.powerbi.config import PowerBiDashboardSourceReport
+
 from datahub.ingestion.source.powerbi.proxy import PowerBiAPI
 from datahub.ingestion.source.powerbi.m_query import validator
 from datahub.ingestion.source.powerbi.m_query import resolver
@@ -34,7 +35,9 @@ def _parse_expression(expression: str) -> Tree:
 
 
 def get_upstream_tables(
-    table: PowerBiAPI.Table, reporter: PowerBiDashboardSourceReport
+    table: PowerBiAPI.Table,
+    reporter: PowerBiDashboardSourceReport,
+    native_query_enabled: bool = True,
 ) -> List[resolver.DataPlatformTable]:
     if table.expression is None:
         reporter.report_warning(table.full_name, "Expression is none")
@@ -42,6 +45,14 @@ def get_upstream_tables(
 
     try:
         parse_tree: Tree = _parse_expression(table.expression)
+        valid, message = validator.validate_parse_tree(parse_tree, native_query_enabled=native_query_enabled)
+        if valid is False:
+            LOGGER.debug("Validation failed: %s", message)
+            reporter.report_warning(
+                table.full_name,
+                message
+            )
+            return []
     except lark.exceptions.UnexpectedCharacters as e:
         LOGGER.debug(f"Fail to parse expression {table.expression}", exc_info=e)
         reporter.report_warning(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
index a71db5f6b1145..bb20b191689b0 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
@@ -346,34 +346,46 @@ def get_full_table_name(self, output_variable: str) -> Optional[str]:
         return f"{db_name}.{schema_name}.{table_name}"
 
 
+class FunctionName(Enum):
+    NATIVE_QUERY = "Value.NativeQuery"
+    POSTGRESQL_DATA_ACCESS = "PostgreSQL.Database"
+    ORACLE_DATA_ACCESS = "Oracle.Database"
+    SNOWFLAKE_DATA_ACCESS = "Snowflake.Databases"
+    MSSQL_DATA_ACCESS = "Sql.Database"
+
+
 class SupportedDataPlatform(Enum):
     POSTGRES_SQL = (
         DataPlatformPair(
             powerbi_data_platform_name="PostgreSQL",
             datahub_data_platform_name="postgres"
         ),
-        PostgresMQueryResolver
+        PostgresMQueryResolver,
+        FunctionName.POSTGRESQL_DATA_ACCESS,
     )
     ORACLE = (
         DataPlatformPair(
             powerbi_data_platform_name="Oracle",
             datahub_data_platform_name="oracle"
         ),
-        OracleMQueryResolver
+        OracleMQueryResolver,
+        FunctionName.ORACLE_DATA_ACCESS,
     )
     SNOWFLAKE = (
         DataPlatformPair(
             powerbi_data_platform_name="Snowflake",
             datahub_data_platform_name="snowflake"
         ),
-        SnowflakeMQueryResolver
+        SnowflakeMQueryResolver,
+        FunctionName.SNOWFLAKE_DATA_ACCESS,
     )
     MS_SQL = (
         DataPlatformPair(
             powerbi_data_platform_name="Sql",
             datahub_data_platform_name="mssql"
         ),
-        MSSqlMQueryResolver
+        MSSqlMQueryResolver,
+        FunctionName.MSSQL_DATA_ACCESS,
     )
 
     def get_data_platform_pair(self) -> DataPlatformPair:
@@ -382,6 +394,9 @@ def get_data_platform_pair(self) -> DataPlatformPair:
     def get_m_query_resolver(self) -> Type[BaseMQueryResolver]:
         return self.value[1]
 
+    def get_function_name(self) -> FunctionName:
+        return self.value[2]
+
 
 def get_resolver(parse_tree: Tree) -> Optional[SupportedDataPlatform]:
 
@@ -395,10 +410,8 @@ def get_resolver(parse_tree: Tree) -> Optional[SupportedDataPlatform]:
         data_access_func,
     )
 
-    # Take platform name from data_access_func variable
-    platform_name: str = data_access_func.split(".")[0]
     for platform in SupportedDataPlatform:
-        if platform.get_data_platform_pair().powerbi_data_platform_name == platform_name:
+        if platform.get_function_name().value == data_access_func:
             return platform
 
     LOGGER.info("M-Query resolver not found for data access function %s", data_access_func)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/validator.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/validator.py
index 9f3664bfb5f41..3941e4ed38ed5 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/validator.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/validator.py
@@ -1,8 +1,9 @@
 import logging
 
 from datahub.ingestion.source.powerbi.m_query import tree_function
+from datahub.ingestion.source.powerbi.m_query import resolver
 
-from typing import List, Tuple, Optional
+from typing import List, Tuple, Optional, Set
 from lark import Tree
 
 LOGGER = logging.getLogger(__name__)
@@ -30,10 +31,10 @@ def all_function_should_be_known(supported_funcs: List[str], functions: List[str
     return True, None
 
 
-def validate_parse_tree(supported_funcs: List[str], tree: Tree) -> Tuple[bool, str]:
+def validate_parse_tree(tree: Tree, native_query_enabled: bool = True) -> Tuple[bool, str]:
     """
-    :param supported_funcs: List of supported functions
     :param tree: tree to validate as per functions supported by m_parser module
+    :param native_query_enabled: Whether user want to extract lineage from native query
     :return: first argument is False if validation is failed and second argument would contain the error message.
              in-case of valid tree the first argument is True and second argument would be None.
     """
@@ -41,3 +42,13 @@ def validate_parse_tree(supported_funcs: List[str], tree: Tree) -> Tuple[bool, s
     if len(functions) == 0:
         return False, "Function call not found"
 
+    data_access_function_names: List[str] = [x.get_function_name().value for x in resolver.SupportedDataPlatform]
+    result: Set[str] = set(data_access_function_names) & set(functions)
+    if len(result) != 1:
+        return False, f"More than one data-access functions are found in expression. Functions = {result}"
+
+    if native_query_enabled is False:
+        if resolver.FunctionName.NATIVE_QUERY.value in functions:
+            return False, f"Lineage extraction from native query is disabled."
+
+    return True, None
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index ca9bcb5f7fad5..7a51006e1e46f 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -120,6 +120,7 @@ def test_snowflake_regular_case():
     )
 
     reporter = PowerBiDashboardSourceReport()
+
     data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
         table, reporter
     )
@@ -199,3 +200,18 @@ def test_mssql_regular_case():
         == SupportedDataPlatform.MS_SQL.get_data_platform_pair().powerbi_data_platform_name
     )
 
+
+def test_native_query_disabled():
+    table: PowerBiAPI.Table = PowerBiAPI.Table(
+        expression=M_QUERIES[1],
+        name="virtual_order_table",
+        full_name="OrderDataSet.virtual_order_table",
+    )
+
+    reporter = PowerBiDashboardSourceReport()
+
+    data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
+        table, reporter, native_query_enabled=False
+    )
+
+    assert len(data_platform_tables) == 0

From 0a4a9b0eeef9f88a68281ed91cd174810299a3c2 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Sun, 18 Dec 2022 22:13:25 +0530
Subject: [PATCH 25/53] WIP

---
 .../ingestion/source/powerbi/m_parser2.py     | 580 ------------------
 .../source/powerbi/m_query/parser.py          |  13 +-
 .../source/powerbi/m_query/resolver.py        | 252 +++++---
 .../source/powerbi/m_query/tree_function.py   |   6 +-
 .../integration/powerbi/test_m_parser.py      | 353 ++++++-----
 5 files changed, 367 insertions(+), 837 deletions(-)
 delete mode 100644 metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser2.py

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser2.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser2.py
deleted file mode 100644
index 2979e181ca248..0000000000000
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_parser2.py
+++ /dev/null
@@ -1,580 +0,0 @@
-import importlib.resources as pkg_resource
-import logging
-from abc import ABC, abstractmethod
-from dataclasses import dataclass
-from enum import Enum
-from functools import partial
-from typing import Any, Dict, List, Optional, Tuple, Type, Union, cast
-
-import lark
-from lark import Lark, Token, Tree
-
-from datahub.ingestion.source.powerbi.config import PowerBiDashboardSourceReport
-from datahub.ingestion.source.powerbi.proxy import PowerBiAPI
-
-LOGGER = logging.getLogger(__name__)
-
-
-@dataclass
-class DataPlatformTable:
-    name: str
-    full_name: str
-    platform_type: str
-
-
-class SupportedDataPlatform(Enum):
-    POSTGRES_SQL = "PostgreSQL"
-    ORACLE = "Oracle"
-    SNOWFLAKE = "Snowflake"
-    MS_SQL = "Sql"
-
-
-POWERBI_TO_DATAHUB_DATA_PLATFORM_MAPPING: Dict[str, str] = {
-    SupportedDataPlatform.POSTGRES_SQL.value: "postgres",
-    SupportedDataPlatform.ORACLE.value: "oracle",
-    SupportedDataPlatform.SNOWFLAKE.value: "snowflake",
-}
-
-
-def _get_output_variable(root: Tree) -> Optional[str]:
-    in_expression_tree: Optional[Tree] = _get_first_rule(root, "in_expression")
-    if in_expression_tree is None:
-        return None
-    # Get list of terminal value
-    # Remove any whitespaces
-    # Remove any spaces
-    return "".join(
-        _strip_char_from_list(
-            _remove_whitespaces_from_list(_token_values(in_expression_tree)), " "
-        )
-    )
-
-
-def _get_variable_statement(parse_tree: Tree, variable: str) -> Optional[Tree]:
-    _filter = parse_tree.find_data("variable")
-    # filter will return statement of the form <variable-name> = <expression>
-    # We are searching for Tree where variable-name is matching with provided variable
-    for tree in _filter:
-        values: List[str] = _token_values(tree.children[0])
-        actual_value: str = "".join(_strip_char_from_list(values, " "))
-        LOGGER.debug("Actual Value = %s", actual_value)
-        LOGGER.debug("Expected Value = %s", variable)
-
-        if actual_value == variable:
-            return tree
-
-    LOGGER.info("Provided variable(%s) not found in variable rule", variable)
-
-    return None
-
-
-def _get_first_rule(tree: Tree, rule: str) -> Optional[Tree]:
-    """
-    Lark library doesn't have advance search function.
-    This function will return the first tree of provided rule
-    :param tree: Tree to search for the expression rule
-    :return: Tree
-    """
-
-    def internal(node: Union[Tree, Token]) -> Optional[Tree]:
-        if isinstance(node, Tree) and node.data == rule:
-            return node
-        if isinstance(node, Token):
-            return None
-
-        for child in cast(Tree, node).children:
-            child_node: Optional[Tree] = internal(child)
-            if child_node is not None:
-                return child_node
-
-        return None
-
-    expression_tree: Optional[Tree] = internal(tree)
-
-    return expression_tree
-
-
-def _token_values(tree: Tree) -> List[str]:
-    """
-
-    :param tree: Tree to traverse
-    :return: List of leaf token data
-    """
-    values: List[str] = []
-
-    def internal(node: Union[Tree, Token]) -> None:
-        if isinstance(node, Token):
-            values.append(cast(Token, node).value)
-            return
-
-        for child in node.children:
-            internal(child)
-
-    internal(tree)
-
-    return values
-
-
-def _remove_whitespaces_from_list(values: List[str]) -> List[str]:
-    result: List[str] = []
-    for item in values:
-        if item.strip() not in ("", "\n", "\t"):
-            result.append(item)
-
-    return result
-
-
-def _strip_char_from_list(values: List[str], char: str) -> List[str]:
-    result: List[str] = []
-    for item in values:
-        result.append(item.strip(char))
-
-    return result
-
-
-def _make_function_name(tree: Tree) -> str:
-    values: List[str] = _token_values(tree)
-    return ".".join(values)
-
-
-class AbstractMQueryResolver(ABC):
-    pass
-
-
-class AbstractDataAccessMQueryResolver(AbstractMQueryResolver, ABC):
-    table: PowerBiAPI.Table
-    parse_tree: Tree
-    reporter: PowerBiDashboardSourceReport
-
-    def __init__(
-        self,
-        table: PowerBiAPI.Table,
-        parse_tree: Tree,
-        reporter: PowerBiDashboardSourceReport,
-    ):
-        self.table = table
-        self.parse_tree = parse_tree
-        self.reporter = reporter
-        self.first_expression_func = partial(_get_first_rule, rule="expression")
-        self.first_item_selector_func = partial(_get_first_rule, rule="item_selector")
-        self.first_arg_list_func = partial(_get_first_rule, rule="argument_list")
-        self.first_identifier_func = partial(_get_first_rule, rule="identifier")
-
-    @abstractmethod
-    def resolve_to_data_platform_table_list(self) -> List[DataPlatformTable]:
-        pass
-
-
-class BaseMQueryResolver(AbstractDataAccessMQueryResolver, ABC):
-    def get_item_selector_tokens(
-        self, variable_statement: Tree
-    ) -> Tuple[Optional[str], Optional[Dict[str, str]]]:
-        expression_tree: Optional[Tree] = self.first_expression_func(variable_statement)
-        if expression_tree is None:
-            LOGGER.debug("Expression tree not found")
-            LOGGER.debug(variable_statement.pretty())
-            return None, None
-
-        item_selector: Optional[Tree] = self.first_item_selector_func(expression_tree)
-        if item_selector is None:
-            LOGGER.debug("Item Selector not found in tree")
-            LOGGER.debug(variable_statement.pretty())
-            return None, None
-
-        identifier_tree: Optional[Tree] = self.first_identifier_func(expression_tree)
-        if identifier_tree is None:
-            LOGGER.debug("Identifier not found in tree")
-            LOGGER.debug(variable_statement.pretty())
-            return None, None
-
-        # remove whitespaces and quotes from token
-        tokens: List[str] = _strip_char_from_list(
-            _remove_whitespaces_from_list(_token_values(cast(Tree, item_selector))),
-            '"',
-        )
-        identifier: List[str] = _token_values(
-            cast(Tree, identifier_tree)
-        )  # type :ignore
-        # convert tokens to dict
-        iterator = iter(tokens)
-        # cast to satisfy lint
-        return identifier[0], dict(zip(iterator, iterator))
-
-    def get_argument_list(self, variable_statement: Tree) -> Optional[List[str]]:
-        expression_tree: Optional[Tree] = self.first_expression_func(variable_statement)
-        if expression_tree is None:
-            LOGGER.debug("First expression rule not found in input tree")
-            return None
-
-        argument_list: Optional[Tree] = self.first_arg_list_func(expression_tree)
-        if argument_list is None:
-            LOGGER.debug("First argument-list rule not found in input tree")
-            return None
-
-        # remove whitespaces and quotes from token
-        tokens: List[str] = _strip_char_from_list(
-            _remove_whitespaces_from_list(_token_values(argument_list)), '"'
-        )
-        return tokens
-
-    def resolve_to_data_platform_table_list(self) -> List[DataPlatformTable]:
-        data_platform_tables: List[DataPlatformTable] = []
-        # Look for output variable
-        output_variable: Optional[str] = _get_output_variable(self.parse_tree)
-        if output_variable is None:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-output-variable",
-                "output-variable not found in table expression",
-            )
-            return data_platform_tables
-
-        full_table_name: Optional[str] = self.get_full_table_name(output_variable)
-        if full_table_name is None:
-            LOGGER.debug(
-                "Fail to form full_table_name for PowerBI DataSet table %s",
-                self.table.full_name,
-            )
-            return data_platform_tables
-
-        return [
-            DataPlatformTable(
-                name=full_table_name.split(".")[-1],
-                full_name=full_table_name,
-                platform_type=self.get_platform(),
-            ),
-        ]
-
-    @abstractmethod
-    def get_platform(self) -> str:
-        pass
-
-    @abstractmethod
-    def get_full_table_name(self, output_variable: str) -> Optional[str]:
-        pass
-
-
-class DefaultTwoStepDataAccessSources(BaseMQueryResolver, ABC):
-    """
-    These are the DataSource for which PowerBI Desktop generates default M-Query of following pattern
-        let
-            Source = Sql.Database("localhost", "library"),
-            dbo_book_issue = Source{[Schema="dbo",Item="book_issue"]}[Data]
-        in
-            dbo_book_issue
-    """
-
-    def get_full_table_name(self, output_variable: str) -> Optional[str]:
-        variable_statement: Optional[Tree] = _get_variable_statement(
-            self.parse_tree, output_variable
-        )
-        if variable_statement is None:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-variable-statement",
-                f"output variable ({output_variable}) statement not found in table expression",
-            )
-            return None
-        source, tokens = self.get_item_selector_tokens(cast(Tree, variable_statement))
-        if source is None or tokens is None:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-variable-statement",
-                "Schema detail not found in table expression",
-            )
-            return None
-
-        schema_name: str = tokens["Schema"]
-        table_name: str = tokens["Item"]
-        # Look for database-name
-        variable_statement = _get_variable_statement(self.parse_tree, source)
-        if variable_statement is None:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-source-statement",
-                f"source variable {source} statement not found in table expression",
-            )
-            return None
-        arg_list = self.get_argument_list(cast(Tree, variable_statement))
-        if arg_list is None or len(arg_list) < 1:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-database-arg-list",
-                "Expected number of argument not found in data-access function of table expression",
-            )
-            return None
-
-        database_name: str = cast(List[str], arg_list)[1]  # 1st token is database name
-        return cast(Optional[str], f"{database_name}.{schema_name}.{table_name}")
-
-
-class PostgresMQueryResolver(DefaultTwoStepDataAccessSources):
-    def get_platform(self) -> str:
-        return SupportedDataPlatform.POSTGRES_SQL.value
-
-
-class MSSqlMQueryResolver(DefaultTwoStepDataAccessSources):
-    def get_platform(self) -> str:
-        return SupportedDataPlatform.MS_SQL.value
-
-
-class OracleMQueryResolver(BaseMQueryResolver):
-    def get_platform(self) -> str:
-        return SupportedDataPlatform.ORACLE.value
-
-    def _get_db_name(self, value: str) -> Optional[str]:
-        error_message: str = f"The target argument ({value}) should in the format of <host-name>:<port>/<db-name>[.<domain>]"
-        splitter_result: List[str] = value.split("/")
-        if len(splitter_result) != 2:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-oracle-target", error_message
-            )
-            return None
-
-        db_name = splitter_result[1].split(".")[0]
-
-        return db_name
-
-    def get_full_table_name(self, output_variable: str) -> Optional[str]:
-        # Find step for the output variable
-        variable_statement: Optional[Tree] = _get_variable_statement(
-            self.parse_tree, output_variable
-        )
-
-        if variable_statement is None:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-variable-statement",
-                f"output variable ({output_variable}) statement not found in table expression",
-            )
-            return None
-
-        schema_variable, tokens = self.get_item_selector_tokens(
-            cast(Tree, variable_statement)
-        )
-        if schema_variable is None or tokens is None:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-variable-statement",
-                "table name not found in table expression",
-            )
-            return None
-
-        table_name: str = tokens["Name"]
-
-        # Find step for the schema variable
-        variable_statement = _get_variable_statement(
-            self.parse_tree, cast(str, schema_variable)
-        )
-        if variable_statement is None:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-schema-variable-statement",
-                f"schema variable ({schema_variable}) statement not found in table expression",
-            )
-            return None
-
-        source_variable, tokens = self.get_item_selector_tokens(variable_statement)
-        if source_variable is None or tokens is None:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-variable-statement",
-                "Schema not found in table expression",
-            )
-            return None
-
-        schema_name: str = tokens["Schema"]
-
-        # Find step for the database access variable
-        variable_statement = _get_variable_statement(self.parse_tree, source_variable)
-        if variable_statement is None:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-source-variable-statement",
-                f"schema variable ({source_variable}) statement not found in table expression",
-            )
-            return None
-        arg_list = self.get_argument_list(variable_statement)
-        if arg_list is None or len(arg_list) < 1:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-database-arg-list",
-                "Expected number of argument not found in data-access function of table expression",
-            )
-            return None
-        # The first argument has database name. format localhost:1521/salesdb.GSLAB.COM
-        db_name: Optional[str] = self._get_db_name(arg_list[0])
-        if db_name is None:
-            LOGGER.debug(f"Fail to extract db name from the target {arg_list}")
-
-        return f"{db_name}.{schema_name}.{table_name}"
-
-
-class SnowflakeMQueryResolver(BaseMQueryResolver):
-    def get_platform(self) -> str:
-        return SupportedDataPlatform.SNOWFLAKE.value
-
-    def get_full_table_name(self, output_variable: str) -> Optional[str]:
-        # Find step for the output variable
-        variable_statement: Optional[Tree] = _get_variable_statement(
-            self.parse_tree, output_variable
-        )
-
-        if variable_statement is None:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-variable-statement",
-                f"output variable ({output_variable}) statement not found in table expression",
-            )
-            return None
-
-        schema_variable, tokens = self.get_item_selector_tokens(variable_statement)
-        if schema_variable is None or tokens is None:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-variable-statement",
-                "table name not found in table expression",
-            )
-            return None
-
-        table_name: str = tokens["Name"]
-
-        # Find step for the schema variable
-        variable_statement = _get_variable_statement(self.parse_tree, schema_variable)
-        if variable_statement is None:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-schema-variable-statement",
-                f"schema variable ({schema_variable}) statement not found in table expression",
-            )
-            return None
-
-        source_variable, tokens = self.get_item_selector_tokens(variable_statement)
-        if source_variable is None or tokens is None:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-variable-statement",
-                "schema name not found in table expression",
-            )
-            return None
-
-        schema_name: str = tokens["Name"]
-
-        # Find step for the database access variable
-        variable_statement = _get_variable_statement(self.parse_tree, source_variable)
-        if variable_statement is None:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-source-variable-statement",
-                f"schema variable ({source_variable}) statement not found in table expression",
-            )
-            return None
-        _, tokens = self.get_item_selector_tokens(variable_statement)
-        if tokens is None:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-variable-statement",
-                "database name not found in table expression",
-            )
-            return None
-
-        db_name: str = tokens["Name"]
-
-        return f"{db_name}.{schema_name}.{table_name}"
-
-
-def _get_resolver(parse_tree: Tree) -> Optional[Type["BaseMQueryResolver"]]:
-
-    _filter: Any = parse_tree.find_data("invoke_expression")
-
-    letter_tree: Tree = next(_filter).children[0]
-    data_access_func: str = _make_function_name(letter_tree)
-
-    LOGGER.debug(
-        "Looking for data-access(%s) resolver in data-access-function registry %s",
-        data_access_func,
-        DATA_ACCESS_RESOLVER,
-    )
-
-    if DATA_ACCESS_RESOLVER.get(data_access_func) is None:
-        LOGGER.info("Resolver not found for %s", data_access_func)
-        return None
-
-    return DATA_ACCESS_RESOLVER[data_access_func]
-
-
-# Register M-Query resolver for specific database platform
-DATA_ACCESS_RESOLVER = {
-    f"{SupportedDataPlatform.POSTGRES_SQL.value}.Database": PostgresMQueryResolver,
-    f"{SupportedDataPlatform.ORACLE.value}.Database": OracleMQueryResolver,
-    f"{SupportedDataPlatform.SNOWFLAKE.value}.Databases": SnowflakeMQueryResolver,
-    f"{SupportedDataPlatform.MS_SQL.value}.Database": MSSqlMQueryResolver,
-}  # type :ignore
-
-
-def _parse_expression(expression: str) -> Tree:
-    # Read lexical grammar as text
-    grammar: str = pkg_resource.read_text(
-        "datahub.ingestion.source.powerbi", "powerbi-lexical-grammar.rule"
-    )
-
-    # Create lark parser for the grammar text
-    lark_parser = Lark(grammar, start="let_expression", regex=True)
-
-    parse_tree: Tree = lark_parser.parse(expression)
-
-    LOGGER.debug("Parse Tree")
-    if (
-        LOGGER.level == logging.DEBUG
-    ):  # Guard condition to avoid heavy pretty() function call
-        LOGGER.debug(parse_tree.pretty())
-
-    return parse_tree
-
-
-def _validate_parse_tree(supported_funcs: List[str], tree: Tree) -> Tuple[bool, str]:
-    """
-    :param tree: tree to validate as per functions supported by m_parser module
-    :return: first argument is False if validation is failed and second argument would contain the error message.
-             in-case of valid tree the first argument is True and second argument would be None.
-    """
-    _filter: List[Tree] = tree.find_data("invoke_expression")
-
-    valid: bool = False
-    message: Optional[str] = None
-
-    for node in _filter:
-        primary_expression_node: Optional[Tree] = _get_first_rule(node, "primary_expression")
-        if primary_expression_node is None:
-            continue
-        identifier_node: Optional[Tree] = _get_first_rule(primary_expression_node, "identifier")
-        if identifier_node is None:
-            continue
-
-        function_name: str = _make_function_name(identifier_node)
-        # This function should be in our supported function list
-        if function_name not in supported_funcs:
-            return False, f"function {function_name} is not supported"
-
-
-def get_upstream_tables(
-    table: PowerBiAPI.Table, reporter: PowerBiDashboardSourceReport
-) -> List[DataPlatformTable]:
-    if table.expression is None:
-        reporter.report_warning(table.full_name, "Expression is none")
-        return []
-
-    try:
-        parse_tree: Tree = _parse_expression(table.expression)
-        _validate_parse_tree([], parse_tree)
-        exit()
-    except lark.exceptions.UnexpectedCharacters as e:
-        LOGGER.debug(f"Fail to parse expression {table.expression}", exc_info=e)
-        reporter.report_warning(
-            table.full_name, f"UnSupported expression = {table.expression}"
-        )
-        return []
-
-    trees: List[Tree] = list(parse_tree.find_data("invoke_expression"))
-    if len(trees) > 1:
-        reporter.report_warning(
-            table.full_name, f"{table.full_name} has more than one invoke expression"
-        )
-        return []
-
-    resolver: Optional[Type[BaseMQueryResolver]] = _get_resolver(parse_tree)
-    if resolver is None:
-        LOGGER.debug("Table full-name = %s", table.full_name)
-        LOGGER.debug("Expression = %s", table.expression)
-        reporter.report_warning(
-            table.full_name,
-            f"{table.full_name} M-Query resolver not found for the table expression",
-        )
-        return []
-
-    return resolver(
-        table, parse_tree, reporter
-    ).resolve_to_data_platform_table_list()  # type: ignore
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
index 341a9a215dfcb..2b442f1394037 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
@@ -60,19 +60,8 @@ def get_upstream_tables(
         )
         return []
 
-    resolver_enum: Optional[resolver.SupportedDataPlatform] = resolver.get_resolver(parse_tree)
-    if resolver_enum is None:
-        LOGGER.debug("Table full-name = %s", table.full_name)
-        LOGGER.debug("Expression = %s", table.expression)
-        reporter.report_warning(
-            table.full_name,
-            f"{table.full_name} M-Query resolver not found for the table expression",
-        )
-        return []
-
-    return resolver_enum.get_m_query_resolver()(
+    return resolver.BaseMQueryResolver(
         table=table,
         parse_tree=parse_tree,
-        data_platform_pair=resolver_enum.get_data_platform_pair(),
         reporter=reporter,
     ).resolve_to_data_platform_table_list()  # type: ignore
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
index bb20b191689b0..50a5e488d32e0 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
@@ -28,27 +28,27 @@ class DataPlatformTable:
     data_platform_pair: DataPlatformPair
 
 
-class AbstractMQueryResolver(ABC):
-    pass
+class FullTableNameCreator(ABC):
+    @abstractmethod
+    def get_full_table_names(self, token_dict: Dict[str, Any]) -> List[str]:
+        pass
 
 
-class AbstractDataAccessMQueryResolver(AbstractMQueryResolver, ABC):
+class AbstractDataAccessMQueryResolver(ABC):
     table: PowerBiAPI.Table
     parse_tree: Tree
     reporter: PowerBiDashboardSourceReport
-    data_platform_pair: DataPlatformPair
 
     def __init__(
         self,
         table: PowerBiAPI.Table,
         parse_tree: Tree,
-        data_platform_pair: DataPlatformPair,
         reporter: PowerBiDashboardSourceReport,
     ):
         self.table = table
         self.parse_tree = parse_tree
         self.reporter = reporter
-        self.data_platform_pair = data_platform_pair
+        self.specific_resolver = {}
 
     @abstractmethod
     def resolve_to_data_platform_table_list(self) -> List[DataPlatformTable]:
@@ -56,25 +56,21 @@ def resolve_to_data_platform_table_list(self) -> List[DataPlatformTable]:
 
 
 class BaseMQueryResolver(AbstractDataAccessMQueryResolver, ABC):
+    @staticmethod
     def get_item_selector_tokens(
-        self, variable_statement: Tree
+            expression_tree: Tree
     ) -> Tuple[Optional[str], Optional[Dict[str, str]]]:
-        expression_tree: Optional[Tree] = tree_function.first_expression_func(variable_statement)
-        if expression_tree is None:
-            LOGGER.debug("Expression tree not found")
-            LOGGER.debug(variable_statement.pretty())
-            return None, None
 
         item_selector: Optional[Tree] = tree_function.first_item_selector_func(expression_tree)
         if item_selector is None:
             LOGGER.debug("Item Selector not found in tree")
-            LOGGER.debug(variable_statement.pretty())
+            LOGGER.debug(expression_tree.pretty())
             return None, None
 
         identifier_tree: Optional[Tree] = tree_function.first_identifier_func(expression_tree)
         if identifier_tree is None:
             LOGGER.debug("Identifier not found in tree")
-            LOGGER.debug(variable_statement.pretty())
+            LOGGER.debug(item_selector.pretty())
             return None, None
 
         # remove whitespaces and quotes from token
@@ -90,7 +86,7 @@ def get_item_selector_tokens(
         # cast to satisfy lint
         return identifier[0], dict(zip(iterator, iterator))
 
-    def get_argument_list(self, variable_statement: Tree) -> Optional[List[str]]:
+    def get_argument_list(self, variable_statement: Tree) -> Optional[Tree]:
         expression_tree: Optional[Tree] = tree_function.first_expression_func(variable_statement)
         if expression_tree is None:
             LOGGER.debug("First expression rule not found in input tree")
@@ -101,15 +97,104 @@ def get_argument_list(self, variable_statement: Tree) -> Optional[List[str]]:
             LOGGER.debug("First argument-list rule not found in input tree")
             return None
 
-        # remove whitespaces and quotes from token
-        tokens: List[str] = tree_function.strip_char_from_list(
-            tree_function.remove_whitespaces_from_list(tree_function.token_values(argument_list)), '"'
-        )
-        return tokens
+        return argument_list
+
+    def make_token_dict(self, identifier: str) -> Dict[str, Any]:
+        token_dict: Dict[str, Any] = {}
+
+        def fill_token_dict(identifier: str, supported_data_access_func: List[str], t_dict: Dict[str, Any]) -> None:
+            """
+            1) Find statement where identifier appear in the left-hand side i.e. identifier  = expression
+            2) Check expression is function invocation i.e. invoke_expression or item_selector
+            3) if it is function invocation and this function is not the data-access function then take first argument
+               i.e. identifier and call the function recursively
+            4) if it is item_selector then take identifier and key-value pair,
+               add identifier and key-value pair in current_selector and call the function recursively
+            5) This recursion will continue till we reach to data-access function and during recursion we will fill
+               token_dict dictionary for all item_selector we find during traversal.
+
+            :param identifier: variable to look for
+            :param supported_data_access_func: List of supported data-access functions
+            :param t_dict: dict where key is identifier and value is key-value pair which represent item selected from
+                           identifier
+            :return: None
+            """
+            v_statement: Optional[Tree] = tree_function.get_variable_statement(
+                self.parse_tree, identifier
+            )
+            if v_statement is None:
+                self.reporter.report_warning(
+                    f"{self.table.full_name}-variable-statement",
+                    f"output variable ({identifier}) statement not found in table expression",
+                )
+                return None
+
+            expression_tree: Optional[Tree] = tree_function.first_expression_func(v_statement)
+            if expression_tree is None:
+                LOGGER.debug("Expression tree not found")
+                LOGGER.debug(v_statement.pretty())
+                return None
+            invoke_expression: Optional[Tree] = tree_function.first_invoke_expression_func(expression_tree)
+            if invoke_expression is not None:
+                letter_tree: Tree = invoke_expression.children[0]
+                data_access_func: str = tree_function.make_function_name(letter_tree)
+                if data_access_func in supported_data_access_func:
+                    token_dict.update(
+                        {
+                            f"{data_access_func}": {
+                                "arg_list": self.get_argument_list(expression_tree),
+                                **t_dict,
+                            }
+                        }
+                    )
+                    return
+
+                first_arg_tree: Optional[Tree] = tree_function.first_arg_list_func(invoke_expression)
+                if first_arg_tree is None:
+                    LOGGER.debug("Function invocation without argument in expression = %s", invoke_expression.pretty())
+                    self.reporter.report_warning(
+                        f"{self.table.full_name}-variable-statement",
+                        f"Function invocation without argument",
+                    )
+                    return None
+                type_expression: Optional[Tree] = tree_function.first_type_expression_func(first_arg_tree)
+                if type_expression is None:
+                    LOGGER.debug("Type expression not found in expression = %s", first_arg_tree.pretty())
+                    self.reporter.report_warning(
+                        f"{self.table.full_name}-variable-statement",
+                        f"Type expression not found",
+                    )
+                    return None
+
+                tokens: List[str] = tree_function.token_values(type_expression)
+                if len(tokens) != 1:
+                    LOGGER.debug("type-expression has more than one identifier = %s", type_expression.pretty())
+                    self.reporter.report_warning(
+                        f"{self.table.full_name}-variable-statement",
+                        f"Unsupported type expression",
+                    )
+                    return None
+                new_identifier: str = tokens[0]
+                fill_token_dict(new_identifier, supported_data_access_func, t_dict)
+            else:
+                identifier, key_vs_value = self.get_item_selector_tokens(
+                    tree_function.first_expression_func(expression_tree)
+                )
+                current_selector: Dict[str, Any] = {
+                    f"{identifier}": {
+                        "item_selectors": [key_vs_value],
+                        **t_dict,
+                    }
+                }
+                fill_token_dict(identifier, supported_data_access_func, current_selector)
+
+        fill_token_dict(identifier, SupportedResolver.get_function_names(), {})
+
+        return token_dict
 
     def resolve_to_data_platform_table_list(self) -> List[DataPlatformTable]:
         data_platform_tables: List[DataPlatformTable] = []
-        # Look for output variable
+
         output_variable: Optional[str] = tree_function.get_output_variable(self.parse_tree)
         if output_variable is None:
             self.reporter.report_warning(
@@ -118,28 +203,33 @@ def resolve_to_data_platform_table_list(self) -> List[DataPlatformTable]:
             )
             return data_platform_tables
 
-        full_table_name: Optional[str] = self.get_full_table_name(output_variable)
-        if full_table_name is None:
-            LOGGER.debug(
-                "Fail to form full_table_name for PowerBI DataSet table %s",
-                self.table.full_name,
-            )
-            return data_platform_tables
-
-        return [
-            DataPlatformTable(
-                name=full_table_name.split(".")[-1],
-                full_name=full_table_name,
-                data_platform_pair=self.data_platform_pair
-            ),
-        ]
-
-    @abstractmethod
-    def get_full_table_name(self, output_variable: str) -> Optional[str]:
-        pass
-
-
-class DefaultTwoStepDataAccessSources(BaseMQueryResolver, ABC):
+        token_dict: Dict[str, Any] = self.make_token_dict(output_variable)
+
+        # each key is data-access function
+        for data_access_func in token_dict.keys():
+            supported_resolver = SupportedResolver.get_resolver(data_access_func)
+            if supported_resolver is None:
+                LOGGER.debug("Resolver not found for the data-access-function %s", data_access_func)
+                self.reporter.report_warning(
+                    f"{self.table.full_name}-data-access-function",
+                    f"Resolver not found for data-access-function = {data_access_func}"
+                )
+                continue
+
+            table_full_name_creator: FullTableNameCreator = supported_resolver.get_table_full_name_creator()()
+            for table_full_name in table_full_name_creator.get_full_table_names(token_dict):
+                data_platform_tables.append(
+                    DataPlatformTable(
+                        name=table_full_name.split(".")[-1],
+                        full_name=table_full_name,
+                        data_platform_pair=supported_resolver.get_data_platform_pair()
+                    )
+                )
+
+        return data_platform_tables
+
+
+class DefaultTwoStepDataAccessSources(FullTableNameCreator):
     """
     These are the DataSource for which PowerBI Desktop generates default M-Query of following pattern
         let
@@ -149,7 +239,7 @@ class DefaultTwoStepDataAccessSources(BaseMQueryResolver, ABC):
             dbo_book_issue
     """
 
-    def get_full_table_name(self, output_variable: str) -> Optional[str]:
+    def get_full_table_names(self, token_dict: Dict[str, Any]) -> List[str]:
         variable_statement: Optional[Tree] = tree_function.get_variable_statement(
             self.parse_tree, output_variable
         )
@@ -189,15 +279,15 @@ def get_full_table_name(self, output_variable: str) -> Optional[str]:
         return cast(Optional[str], f"{database_name}.{schema_name}.{table_name}")
 
 
-class PostgresMQueryResolver(DefaultTwoStepDataAccessSources):
+class PostgresFullTableNameCreator(DefaultTwoStepDataAccessSources):
     pass
 
 
-class MSSqlMQueryResolver(DefaultTwoStepDataAccessSources):
+class MSSqlFullTableNameCreator(DefaultTwoStepDataAccessSources):
     pass
 
 
-class OracleMQueryResolver(BaseMQueryResolver):
+class OracleFullTableNameCreator(FullTableNameCreator):
 
     def _get_db_name(self, value: str) -> Optional[str]:
         error_message: str = f"The target argument ({value}) should in the format of <host-name>:<port>/<db-name>[.<domain>]"
@@ -212,7 +302,7 @@ def _get_db_name(self, value: str) -> Optional[str]:
 
         return db_name
 
-    def get_full_table_name(self, output_variable: str) -> Optional[str]:
+    def get_full_table_names(self, token_dict: Dict[str, Any]) -> List[str]:
         # Find step for the output variable
         variable_statement: Optional[Tree] = tree_function.get_variable_statement(
             self.parse_tree, output_variable
@@ -281,9 +371,9 @@ def get_full_table_name(self, output_variable: str) -> Optional[str]:
         return f"{db_name}.{schema_name}.{table_name}"
 
 
-class SnowflakeMQueryResolver(BaseMQueryResolver):
+class SnowflakeFullTableNameCreator(FullTableNameCreator):
 
-    def get_full_table_name(self, output_variable: str) -> Optional[str]:
+    def get_full_table_names(self, token_dict: Dict[str, Any]) -> List[str]:
         # Find step for the output variable
         variable_statement: Optional[Tree] = tree_function.get_variable_statement(
             self.parse_tree, output_variable
@@ -346,6 +436,12 @@ def get_full_table_name(self, output_variable: str) -> Optional[str]:
         return f"{db_name}.{schema_name}.{table_name}"
 
 
+class NativeQueryFullTableNameCreator(FullTableNameCreator):
+
+    def get_full_table_names(self, token_dict: Dict[str, Any]) -> List[str]:
+        pass
+
+
 class FunctionName(Enum):
     NATIVE_QUERY = "Value.NativeQuery"
     POSTGRESQL_DATA_ACCESS = "PostgreSQL.Database"
@@ -354,66 +450,72 @@ class FunctionName(Enum):
     MSSQL_DATA_ACCESS = "Sql.Database"
 
 
-class SupportedDataPlatform(Enum):
+class SupportedResolver(Enum):
     POSTGRES_SQL = (
         DataPlatformPair(
             powerbi_data_platform_name="PostgreSQL",
             datahub_data_platform_name="postgres"
         ),
-        PostgresMQueryResolver,
+        PostgresFullTableNameCreator,
         FunctionName.POSTGRESQL_DATA_ACCESS,
     )
+
     ORACLE = (
         DataPlatformPair(
             powerbi_data_platform_name="Oracle",
             datahub_data_platform_name="oracle"
         ),
-        OracleMQueryResolver,
+        OracleFullTableNameCreator,
         FunctionName.ORACLE_DATA_ACCESS,
     )
+
     SNOWFLAKE = (
         DataPlatformPair(
             powerbi_data_platform_name="Snowflake",
             datahub_data_platform_name="snowflake"
         ),
-        SnowflakeMQueryResolver,
+        SnowflakeFullTableNameCreator,
         FunctionName.SNOWFLAKE_DATA_ACCESS,
     )
+
     MS_SQL = (
         DataPlatformPair(
             powerbi_data_platform_name="Sql",
             datahub_data_platform_name="mssql"
         ),
-        MSSqlMQueryResolver,
+        MSSqlFullTableNameCreator,
         FunctionName.MSSQL_DATA_ACCESS,
     )
 
+    NATIVE_QUERY = (
+        None,
+        NativeQueryFullTableNameCreator,
+        FunctionName.NATIVE_QUERY,
+    )
+
     def get_data_platform_pair(self) -> DataPlatformPair:
         return self.value[0]
 
-    def get_m_query_resolver(self) -> Type[BaseMQueryResolver]:
+    def get_table_full_name_creator(self) -> Type[FullTableNameCreator]:
         return self.value[1]
 
-    def get_function_name(self) -> FunctionName:
-        return self.value[2]
-
-
-def get_resolver(parse_tree: Tree) -> Optional[SupportedDataPlatform]:
+    def get_function_name(self) -> str:
+        return self.value[2].value
 
-    _filter: Any = parse_tree.find_data("invoke_expression")
-
-    letter_tree: Tree = next(_filter).children[0]
-    data_access_func: str = tree_function.make_function_name(letter_tree)
-
-    LOGGER.debug(
-        "Looking for data-access(%s) resolver",
-        data_access_func,
-    )
+    @staticmethod
+    def get_function_names() -> List[str]:
+        functions: List[str] = []
+        for supported_resolver in SupportedResolver:
+            functions.append(
+                supported_resolver.get_function_name()
+            )
 
-    for platform in SupportedDataPlatform:
-        if platform.get_function_name().value == data_access_func:
-            return platform
+        return functions
 
-    LOGGER.info("M-Query resolver not found for data access function %s", data_access_func)
+    @staticmethod
+    def get_resolver(function_name: str) -> Optional["SupportedResolver"]:
+        for supported_resolver in SupportedResolver:
+            if function_name == supported_resolver.get_function_name():
+                return supported_resolver
 
-    return None
+        return None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py
index 91c9550903bd8..66922e9e11e73 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py
@@ -32,7 +32,7 @@ def get_variable_statement(parse_tree: Tree, variable: str) -> Optional[Tree]:
         LOGGER.debug("Actual Value = %s", actual_value)
         LOGGER.debug("Expected Value = %s", variable)
 
-        if actual_value == variable:
+        if actual_value.lower() == variable.lower():
             return tree
 
     LOGGER.info("Provided variable(%s) not found in variable rule", variable)
@@ -141,3 +141,7 @@ def get_all_function_name(tree: Tree) -> List[str]:
 first_identifier_func = partial(get_first_rule, rule="identifier")
 first_primary_expression_func = partial(get_first_rule, rule="primary_expression")
 first_identifier_func = partial(get_first_rule, rule="identifier")
+first_invoke_expression_func = partial(get_first_rule, rule="invoke_expression")
+first_type_expression_func = partial(get_first_rule, rule="type_expression")
+
+
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index 7a51006e1e46f..2709a7db0e304 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -33,177 +33,192 @@
 ]
 
 
-def test_parse_m_query1():
-    expression: str = M_QUERIES[0]
-    parse_tree: Tree = parser._parse_expression(expression)
-    assert tree_function.get_output_variable(parse_tree) == "TESTTABLE_Table"
-
-
-def test_parse_m_query2():
-    expression: str = M_QUERIES[1]
-    parse_tree: Tree = parser._parse_expression(expression)
-    assert tree_function.get_output_variable(parse_tree) == '"Added Custom2"'
-
-
-def test_parse_m_query3():
-    expression: str = M_QUERIES[2]
-    parse_tree: Tree = parser._parse_expression(expression)
-    assert tree_function.get_output_variable(parse_tree) == '"Added Conditional Column"'
-
-
-def test_parse_m_query4():
-    expression: str = M_QUERIES[3]
-    parse_tree: Tree = parser._parse_expression(expression)
-    assert tree_function.get_output_variable(parse_tree) == '"Changed Type"'
-
-
-def test_parse_m_query5():
-    expression: str = M_QUERIES[4]
-    parse_tree: Tree = parser._parse_expression(expression)
-    assert tree_function.get_output_variable(parse_tree) == '"Renamed Columns"'
-
-
-def test_parse_m_query6():
-    expression: str = M_QUERIES[5]
-    parse_tree: Tree = parser._parse_expression(expression)
-    assert tree_function.get_output_variable(parse_tree) == '"Added Custom"'
-
-
-def test_parse_m_query7():
-    expression: str = M_QUERIES[6]
-    parse_tree: Tree = parser._parse_expression(expression)
-    assert tree_function.get_output_variable(parse_tree) == "Source"
-
-
-def test_parse_m_query8():
-    expression: str = M_QUERIES[7]
-    parse_tree: Tree = parser._parse_expression(expression)
-    assert tree_function.get_output_variable(parse_tree) == '"Added Custom1"'
-
-
-def test_parse_m_query9():
-    expression: str = M_QUERIES[8]
-    parse_tree: Tree = parser._parse_expression(expression)
-    assert tree_function.get_output_variable(parse_tree) == '"Added Custom1"'
-
-
-def test_parse_m_query10():
-    expression: str = M_QUERIES[9]
-    parse_tree: Tree = parser._parse_expression(expression)
-    assert tree_function.get_output_variable(parse_tree) == '"Changed Type1"'
-
-
-def test_parse_m_query11():
-    expression: str = M_QUERIES[10]
-    parse_tree: Tree = parser._parse_expression(expression)
-    assert tree_function.get_output_variable(parse_tree) == "Source"
-
-
-def test_parse_m_query12():
-    expression: str = M_QUERIES[11]
-    parse_tree: Tree = parser._parse_expression(expression)
-    assert tree_function.get_output_variable(parse_tree) == '"Added Custom"'
-
-
-def test_parse_m_query13():
-    expression: str = M_QUERIES[12]
-    parse_tree: Tree = parser._parse_expression(expression)
-    assert tree_function.get_output_variable(parse_tree) == "two_source_table"
-
-
-def test_snowflake_regular_case():
-    q: str = M_QUERIES[0]
-    table: PowerBiAPI.Table = PowerBiAPI.Table(
-        expression=q,
-        name="virtual_order_table",
-        full_name="OrderDataSet.virtual_order_table",
-    )
-
-    reporter = PowerBiDashboardSourceReport()
-
-    data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
-        table, reporter
-    )
-
-    assert len(data_platform_tables) == 1
-    assert data_platform_tables[0].name == "TESTTABLE"
-    assert data_platform_tables[0].full_name == "PBI_TEST.TEST.TESTTABLE"
-    assert (
-        data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
-        == SupportedDataPlatform.SNOWFLAKE.get_data_platform_pair().powerbi_data_platform_name
-    )
-
-
-def test_postgres_regular_case():
-    q: str = M_QUERIES[13]
-    table: PowerBiAPI.Table = PowerBiAPI.Table(
-        expression=q,
-        name="virtual_order_table",
-        full_name="OrderDataSet.virtual_order_table",
-    )
-
-    reporter = PowerBiDashboardSourceReport()
-    data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
-        table, reporter
-    )
-
-    assert len(data_platform_tables) == 1
-    assert data_platform_tables[0].name == "order_date"
-    assert data_platform_tables[0].full_name == "mics.public.order_date"
-    assert (
-        data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
-        == SupportedDataPlatform.POSTGRES_SQL.get_data_platform_pair().powerbi_data_platform_name
-    )
-
-
-def test_oracle_regular_case():
-    q: str = M_QUERIES[14]
-    table: PowerBiAPI.Table = PowerBiAPI.Table(
-        expression=q,
-        name="virtual_order_table",
-        full_name="OrderDataSet.virtual_order_table",
-    )
-
-    reporter = PowerBiDashboardSourceReport()
-    data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
-        table, reporter
-    )
-
-    assert len(data_platform_tables) == 1
-    assert data_platform_tables[0].name == "EMPLOYEES"
-    assert data_platform_tables[0].full_name == "salesdb.HR.EMPLOYEES"
-    assert (
-            data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
-            == SupportedDataPlatform.ORACLE.get_data_platform_pair().powerbi_data_platform_name
-    )
-
-
-def test_mssql_regular_case():
-    q: str = M_QUERIES[15]
-    table: PowerBiAPI.Table = PowerBiAPI.Table(
-        expression=q,
-        name="virtual_order_table",
-        full_name="OrderDataSet.virtual_order_table",
-    )
-
-    reporter = PowerBiDashboardSourceReport()
-
-    data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
-        table, reporter
-    )
-
-    assert len(data_platform_tables) == 1
-    assert data_platform_tables[0].name == "book_issue"
-    assert data_platform_tables[0].full_name == "library.dbo.book_issue"
-    assert (
-        data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
-        == SupportedDataPlatform.MS_SQL.get_data_platform_pair().powerbi_data_platform_name
-    )
-
+# def test_parse_m_query1():
+#     expression: str = M_QUERIES[0]
+#     parse_tree: Tree = parser._parse_expression(expression)
+#     assert tree_function.get_output_variable(parse_tree) == "TESTTABLE_Table"
+#
+#
+# def test_parse_m_query2():
+#     expression: str = M_QUERIES[1]
+#     parse_tree: Tree = parser._parse_expression(expression)
+#     assert tree_function.get_output_variable(parse_tree) == '"Added Custom2"'
+#
+#
+# def test_parse_m_query3():
+#     expression: str = M_QUERIES[2]
+#     parse_tree: Tree = parser._parse_expression(expression)
+#     assert tree_function.get_output_variable(parse_tree) == '"Added Conditional Column"'
+#
+#
+# def test_parse_m_query4():
+#     expression: str = M_QUERIES[3]
+#     parse_tree: Tree = parser._parse_expression(expression)
+#     assert tree_function.get_output_variable(parse_tree) == '"Changed Type"'
+#
+#
+# def test_parse_m_query5():
+#     expression: str = M_QUERIES[4]
+#     parse_tree: Tree = parser._parse_expression(expression)
+#     assert tree_function.get_output_variable(parse_tree) == '"Renamed Columns"'
+#
+#
+# def test_parse_m_query6():
+#     expression: str = M_QUERIES[5]
+#     parse_tree: Tree = parser._parse_expression(expression)
+#     assert tree_function.get_output_variable(parse_tree) == '"Added Custom"'
+#
+#
+# def test_parse_m_query7():
+#     expression: str = M_QUERIES[6]
+#     parse_tree: Tree = parser._parse_expression(expression)
+#     assert tree_function.get_output_variable(parse_tree) == "Source"
+#
+#
+# def test_parse_m_query8():
+#     expression: str = M_QUERIES[7]
+#     parse_tree: Tree = parser._parse_expression(expression)
+#     assert tree_function.get_output_variable(parse_tree) == '"Added Custom1"'
+#
+#
+# def test_parse_m_query9():
+#     expression: str = M_QUERIES[8]
+#     parse_tree: Tree = parser._parse_expression(expression)
+#     assert tree_function.get_output_variable(parse_tree) == '"Added Custom1"'
+#
+#
+# def test_parse_m_query10():
+#     expression: str = M_QUERIES[9]
+#     parse_tree: Tree = parser._parse_expression(expression)
+#     assert tree_function.get_output_variable(parse_tree) == '"Changed Type1"'
+#
+#
+# def test_parse_m_query11():
+#     expression: str = M_QUERIES[10]
+#     parse_tree: Tree = parser._parse_expression(expression)
+#     assert tree_function.get_output_variable(parse_tree) == "Source"
+#
+#
+# def test_parse_m_query12():
+#     expression: str = M_QUERIES[11]
+#     parse_tree: Tree = parser._parse_expression(expression)
+#     assert tree_function.get_output_variable(parse_tree) == '"Added Custom"'
+#
+#
+# def test_parse_m_query13():
+#     expression: str = M_QUERIES[12]
+#     parse_tree: Tree = parser._parse_expression(expression)
+#     assert tree_function.get_output_variable(parse_tree) == "two_source_table"
+#
+#
+# def test_snowflake_regular_case():
+#     q: str = M_QUERIES[0]
+#     table: PowerBiAPI.Table = PowerBiAPI.Table(
+#         expression=q,
+#         name="virtual_order_table",
+#         full_name="OrderDataSet.virtual_order_table",
+#     )
+#
+#     reporter = PowerBiDashboardSourceReport()
+#
+#     data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
+#         table, reporter
+#     )
+#
+#     assert len(data_platform_tables) == 1
+#     assert data_platform_tables[0].name == "TESTTABLE"
+#     assert data_platform_tables[0].full_name == "PBI_TEST.TEST.TESTTABLE"
+#     assert (
+#         data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
+#         == SupportedDataPlatform.SNOWFLAKE.get_data_platform_pair().powerbi_data_platform_name
+#     )
+#
+#
+# def test_postgres_regular_case():
+#     q: str = M_QUERIES[13]
+#     table: PowerBiAPI.Table = PowerBiAPI.Table(
+#         expression=q,
+#         name="virtual_order_table",
+#         full_name="OrderDataSet.virtual_order_table",
+#     )
+#
+#     reporter = PowerBiDashboardSourceReport()
+#     data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
+#         table, reporter
+#     )
+#
+#     assert len(data_platform_tables) == 1
+#     assert data_platform_tables[0].name == "order_date"
+#     assert data_platform_tables[0].full_name == "mics.public.order_date"
+#     assert (
+#         data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
+#         == SupportedDataPlatform.POSTGRES_SQL.get_data_platform_pair().powerbi_data_platform_name
+#     )
+#
+#
+# def test_oracle_regular_case():
+#     q: str = M_QUERIES[14]
+#     table: PowerBiAPI.Table = PowerBiAPI.Table(
+#         expression=q,
+#         name="virtual_order_table",
+#         full_name="OrderDataSet.virtual_order_table",
+#     )
+#
+#     reporter = PowerBiDashboardSourceReport()
+#     data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
+#         table, reporter
+#     )
+#
+#     assert len(data_platform_tables) == 1
+#     assert data_platform_tables[0].name == "EMPLOYEES"
+#     assert data_platform_tables[0].full_name == "salesdb.HR.EMPLOYEES"
+#     assert (
+#             data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
+#             == SupportedDataPlatform.ORACLE.get_data_platform_pair().powerbi_data_platform_name
+#     )
+#
+#
+# def test_mssql_regular_case():
+#     q: str = M_QUERIES[15]
+#     table: PowerBiAPI.Table = PowerBiAPI.Table(
+#         expression=q,
+#         name="virtual_order_table",
+#         full_name="OrderDataSet.virtual_order_table",
+#     )
+#
+#     reporter = PowerBiDashboardSourceReport()
+#
+#     data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
+#         table, reporter
+#     )
+#
+#     assert len(data_platform_tables) == 1
+#     assert data_platform_tables[0].name == "book_issue"
+#     assert data_platform_tables[0].full_name == "library.dbo.book_issue"
+#     assert (
+#         data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
+#         == SupportedDataPlatform.MS_SQL.get_data_platform_pair().powerbi_data_platform_name
+#     )
+#
+#
+# def test_native_query_disabled():
+#     table: PowerBiAPI.Table = PowerBiAPI.Table(
+#         expression=M_QUERIES[1],
+#         name="virtual_order_table",
+#         full_name="OrderDataSet.virtual_order_table",
+#     )
+#
+#     reporter = PowerBiDashboardSourceReport()
+#
+#     data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
+#         table, reporter, native_query_enabled=False
+#     )
+#
+#     assert len(data_platform_tables) == 0
 
 def test_native_query_disabled():
     table: PowerBiAPI.Table = PowerBiAPI.Table(
-        expression=M_QUERIES[1],
+        expression=M_QUERIES[9],
         name="virtual_order_table",
         full_name="OrderDataSet.virtual_order_table",
     )
@@ -211,7 +226,7 @@ def test_native_query_disabled():
     reporter = PowerBiDashboardSourceReport()
 
     data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
-        table, reporter, native_query_enabled=False
+        table, reporter
     )
 
     assert len(data_platform_tables) == 0

From eb3eda5d3fb27d0b199c50fcc4e8c8708d89e6dd Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Mon, 19 Dec 2022 13:52:32 +0530
Subject: [PATCH 26/53] native query in MS-SQL

---
 .../powerbi/m_query/native_sql_parser.py      |  29 ++
 .../source/powerbi/m_query/resolver.py        | 368 ++++++++----------
 .../source/powerbi/m_query/validator.py       |   7 +-
 .../integration/powerbi/test_m_parser.py      | 233 ++++++-----
 4 files changed, 340 insertions(+), 297 deletions(-)
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py
new file mode 100644
index 0000000000000..f0ee706c1865b
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py
@@ -0,0 +1,29 @@
+import sqlparse
+from typing import List
+
+
+def get_tables(native_query: str) -> List[str]:
+    # As per current use-case, we are extracting only single table from "from"
+    tables: List[str] = []
+    parsed = sqlparse.parse(native_query)[0]
+
+    tokens: List[sqlparse.sql.Token] = list(parsed.tokens)
+    length: int = len(tokens)
+    from_index: int = -1
+    for index, token in enumerate(tokens):
+        if token.value.lower().strip() == "from" and str(token.ttype) == "Token.Keyword":
+            from_index = index+1
+            break
+
+    table_name = None
+
+    while from_index < length:
+        if isinstance(tokens[from_index], sqlparse.sql.Identifier):
+            table_name = tokens[from_index].value
+            break
+        from_index = from_index + 1
+
+    if table_name is not None:
+        tables.append(table_name)
+
+    return tables
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
index 50a5e488d32e0..bf78b357073f6 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
@@ -10,7 +10,7 @@
 from datahub.ingestion.source.powerbi.config import PowerBiDashboardSourceReport
 from datahub.ingestion.source.powerbi.proxy import PowerBiAPI
 
-from datahub.ingestion.source.powerbi.m_query import tree_function
+from datahub.ingestion.source.powerbi.m_query import tree_function, native_sql_parser
 
 LOGGER = logging.getLogger(__name__)
 
@@ -28,11 +28,37 @@ class DataPlatformTable:
     data_platform_pair: DataPlatformPair
 
 
-class FullTableNameCreator(ABC):
+class SupportedDataPlatform(Enum):
+    POSTGRES_SQL = DataPlatformPair(
+            powerbi_data_platform_name="PostgreSQL",
+            datahub_data_platform_name="postgres"
+        )
+
+    ORACLE = DataPlatformPair(
+            powerbi_data_platform_name="Oracle",
+            datahub_data_platform_name="oracle"
+        )
+
+    SNOWFLAKE = DataPlatformPair(
+            powerbi_data_platform_name="Snowflake",
+            datahub_data_platform_name="snowflake"
+        )
+
+    MS_SQL = DataPlatformPair(
+            powerbi_data_platform_name="Sql",
+            datahub_data_platform_name="mssql"
+        )
+
+
+class AbstractTableFullNameCreator(ABC):
     @abstractmethod
     def get_full_table_names(self, token_dict: Dict[str, Any]) -> List[str]:
         pass
 
+    @abstractmethod
+    def get_platform_pair(self) -> DataPlatformPair:
+        pass
+
 
 class AbstractDataAccessMQueryResolver(ABC):
     table: PowerBiAPI.Table
@@ -177,16 +203,21 @@ def fill_token_dict(identifier: str, supported_data_access_func: List[str], t_di
                 new_identifier: str = tokens[0]
                 fill_token_dict(new_identifier, supported_data_access_func, t_dict)
             else:
-                identifier, key_vs_value = self.get_item_selector_tokens(
+                new_identifier, key_vs_value = self.get_item_selector_tokens(
                     tree_function.first_expression_func(expression_tree)
                 )
                 current_selector: Dict[str, Any] = {
-                    f"{identifier}": {
-                        "item_selectors": [key_vs_value],
+                    f"{new_identifier}": {
+                        "item_selectors": [
+                            {
+                                "items": key_vs_value,
+                                "assigned_to": identifier
+                            }
+                        ],
                         **t_dict,
                     }
                 }
-                fill_token_dict(identifier, supported_data_access_func, current_selector)
+                fill_token_dict(new_identifier, supported_data_access_func, current_selector)
 
         fill_token_dict(identifier, SupportedResolver.get_function_names(), {})
 
@@ -216,20 +247,20 @@ def resolve_to_data_platform_table_list(self) -> List[DataPlatformTable]:
                 )
                 continue
 
-            table_full_name_creator: FullTableNameCreator = supported_resolver.get_table_full_name_creator()()
+            table_full_name_creator: AbstractTableFullNameCreator = supported_resolver.get_table_full_name_creator()()
             for table_full_name in table_full_name_creator.get_full_table_names(token_dict):
                 data_platform_tables.append(
                     DataPlatformTable(
                         name=table_full_name.split(".")[-1],
                         full_name=table_full_name,
-                        data_platform_pair=supported_resolver.get_data_platform_pair()
+                        data_platform_pair=table_full_name_creator.get_platform_pair()
                     )
                 )
 
         return data_platform_tables
 
 
-class DefaultTwoStepDataAccessSources(FullTableNameCreator):
+class DefaultTwoStepDataAccessSources(AbstractTableFullNameCreator, ABC):
     """
     These are the DataSource for which PowerBI Desktop generates default M-Query of following pattern
         let
@@ -239,55 +270,92 @@ class DefaultTwoStepDataAccessSources(FullTableNameCreator):
             dbo_book_issue
     """
 
-    def get_full_table_names(self, token_dict: Dict[str, Any]) -> List[str]:
-        variable_statement: Optional[Tree] = tree_function.get_variable_statement(
-            self.parse_tree, output_variable
-        )
-        if variable_statement is None:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-variable-statement",
-                f"output variable ({output_variable}) statement not found in table expression",
-            )
-            return None
-        source, tokens = self.get_item_selector_tokens(cast(Tree, variable_statement))
-        if source is None or tokens is None:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-variable-statement",
-                "Schema detail not found in table expression",
-            )
-            return None
+    def two_level_access_pattern(self, token_dict: Dict[str, Any]) -> List[str]:
+        full_table_names: List[str] = []
 
-        schema_name: str = tokens["Schema"]
-        table_name: str = tokens["Item"]
-        # Look for database-name
-        variable_statement = tree_function.get_variable_statement(self.parse_tree, source)
-        if variable_statement is None:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-source-statement",
-                f"source variable {source} statement not found in table expression",
-            )
-            return None
-        arg_list = self.get_argument_list(cast(Tree, variable_statement))
-        if arg_list is None or len(arg_list) < 1:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-database-arg-list",
-                "Expected number of argument not found in data-access function of table expression",
+        LOGGER.debug("Processing PostgreSQL token-dict %s", token_dict)
+
+        for data_access_function in token_dict:
+            arguments: List[str] = tree_function.strip_char_from_list(
+                values=tree_function.remove_whitespaces_from_list(
+                            tree_function.token_values(token_dict[data_access_function]["arg_list"])
+                        ),
+                char="\""
             )
-            return None
+            # delete arg_list as we consumed it and don't want to process it in next step
+            if len(arguments) != 2:
+                LOGGER.debug("Expected 2 arguments, but got {%s}", len(arguments))
+                return full_table_names
+
+            del token_dict[data_access_function]["arg_list"]
+
+            db_name: str = arguments[1]
+            for source in token_dict[data_access_function]:
+                source_dict: Dict[str, Any] = token_dict[data_access_function][source]
+                for schema in source_dict["item_selectors"]:
+                    schema_name: str = schema["items"]["Schema"]
+                    table_name: str = schema["items"]["Item"]
+                    full_table_names.append(
+                        f"{db_name}.{schema_name}.{table_name}"
+                    )
 
-        database_name: str = cast(List[str], arg_list)[1]  # 1st token is database name
-        return cast(Optional[str], f"{database_name}.{schema_name}.{table_name}")
+        LOGGER.debug("PostgreSQL full-table-names = %s", full_table_names)
 
+        return full_table_names
 
-class PostgresFullTableNameCreator(DefaultTwoStepDataAccessSources):
-    pass
 
+class PostgresTableFullNameCreator(DefaultTwoStepDataAccessSources):
+    def get_full_table_names(self, token_dict: Dict[str, Any]) -> List[str]:
+        return self.two_level_access_pattern(token_dict)
 
-class MSSqlFullTableNameCreator(DefaultTwoStepDataAccessSources):
-    pass
+    def get_platform_pair(self) -> DataPlatformPair:
+        return SupportedDataPlatform.POSTGRES_SQL.value
 
 
-class OracleFullTableNameCreator(FullTableNameCreator):
+class MSSqlTableFullNameCreator(DefaultTwoStepDataAccessSources):
+    def get_platform_pair(self) -> DataPlatformPair:
+        return SupportedDataPlatform.MS_SQL.value
+
+    def get_full_table_names(self, token_dict: Dict[str, Any]) -> List[str]:
+        full_table_names: List[str] = []
+        data_access_dict: Dict[str, Any] = list(token_dict.values())[0]
+
+        arguments: List[str] = tree_function.strip_char_from_list(
+            values=tree_function.remove_whitespaces_from_list(
+                        tree_function.token_values(data_access_dict["arg_list"])
+                    ),
+            char="\""
+        )
+
+        if len(arguments) == 2:
+            # It is regular case of MS-SQL
+            LOGGER.debug("Handling with regular case")
+            return self.two_level_access_pattern(token_dict)
+
+        if len(arguments) >= 4 and arguments[2] != "Query":
+            LOGGER.debug("Unsupported case is found. Second index is not the Query")
+            return full_table_names
+
+        db_name: str = arguments[1]
+        tables: List[str] = native_sql_parser.get_tables(arguments[3])
+        for table in tables:
+            schema_and_table: List[str] = table.split(".")
+            if len(schema_and_table) == 1:
+                # schema name is not present. Default schema name in MS-SQL is dbo
+                # https://learn.microsoft.com/en-us/sql/relational-databases/security/authentication-access/ownership-and-user-schema-separation?view=sql-server-ver16
+                schema_and_table.insert(0, "dbo")
+
+            full_table_names.append(
+                f"{db_name}.{schema_and_table[0]}.{schema_and_table[1]}"
+            )
+        LOGGER.debug("MS-SQL full-table-names %s", full_table_names)
+
+        return full_table_names
+
+
+class OracleTableFullNameCreator(AbstractTableFullNameCreator):
+    def get_platform_pair(self) -> DataPlatformPair:
+        return SupportedDataPlatform.ORACLE.value
 
     def _get_db_name(self, value: str) -> Optional[str]:
         error_message: str = f"The target argument ({value}) should in the format of <host-name>:<port>/<db-name>[.<domain>]"
@@ -303,143 +371,72 @@ def _get_db_name(self, value: str) -> Optional[str]:
         return db_name
 
     def get_full_table_names(self, token_dict: Dict[str, Any]) -> List[str]:
-        # Find step for the output variable
-        variable_statement: Optional[Tree] = tree_function.get_variable_statement(
-            self.parse_tree, output_variable
-        )
-
-        if variable_statement is None:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-variable-statement",
-                f"output variable ({output_variable}) statement not found in table expression",
-            )
-            return None
-
-        schema_variable, tokens = self.get_item_selector_tokens(
-            cast(Tree, variable_statement)
-        )
-        if schema_variable is None or tokens is None:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-variable-statement",
-                "table name not found in table expression",
-            )
-            return None
+        full_table_names: List[str] = []
 
-        table_name: str = tokens["Name"]
+        LOGGER.debug("Processing Oracle token-dict %s", token_dict)
 
-        # Find step for the schema variable
-        variable_statement = tree_function.get_variable_statement(
-            self.parse_tree, cast(str, schema_variable)
-        )
-        if variable_statement is None:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-schema-variable-statement",
-                f"schema variable ({schema_variable}) statement not found in table expression",
-            )
-            return None
+        for data_access_function in token_dict:
+            arguments: List[str] = tree_function.remove_whitespaces_from_list(
+                tree_function.token_values(token_dict[data_access_function]["arg_list"]))
+            # delete arg_list as we consumed it and don't want to process it in next step
+            del token_dict[data_access_function]["arg_list"]
 
-        source_variable, tokens = self.get_item_selector_tokens(variable_statement)
-        if source_variable is None or tokens is None:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-variable-statement",
-                "Schema not found in table expression",
-            )
-            return None
+            for source in token_dict[data_access_function]:
+                source_dict: Dict[str, Any] = token_dict[data_access_function][source]
 
-        schema_name: str = tokens["Schema"]
+                db_name: Optional[str] = self._get_db_name(arguments[0])
+                if db_name is None:
+                    return full_table_names
 
-        # Find step for the database access variable
-        variable_statement = tree_function.get_variable_statement(self.parse_tree, source_variable)
-        if variable_statement is None:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-source-variable-statement",
-                f"schema variable ({source_variable}) statement not found in table expression",
-            )
-            return None
-        arg_list = self.get_argument_list(variable_statement)
-        if arg_list is None or len(arg_list) < 1:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-database-arg-list",
-                "Expected number of argument not found in data-access function of table expression",
-            )
-            return None
-        # The first argument has database name. format localhost:1521/salesdb.GSLAB.COM
-        db_name: Optional[str] = self._get_db_name(arg_list[0])
-        if db_name is None:
-            LOGGER.debug(f"Fail to extract db name from the target {arg_list}")
+                for schema in source_dict["item_selectors"]:
+                    schema_name: str = schema["items"]["Schema"]
+                    for item_selectors in source_dict[schema["assigned_to"]]:
+                        for item_selector in source_dict[schema["assigned_to"]][item_selectors]:
+                            table_name: str = item_selector["items"]["Name"]
+                            full_table_names.append(
+                                f"{db_name}.{schema_name}.{table_name}"
+                            )
 
-        return f"{db_name}.{schema_name}.{table_name}"
+        return full_table_names
 
 
-class SnowflakeFullTableNameCreator(FullTableNameCreator):
+class SnowflakeTableFullNameCreator(AbstractTableFullNameCreator):
+    def get_platform_pair(self) -> DataPlatformPair:
+        return SupportedDataPlatform.SNOWFLAKE.value
 
     def get_full_table_names(self, token_dict: Dict[str, Any]) -> List[str]:
-        # Find step for the output variable
-        variable_statement: Optional[Tree] = tree_function.get_variable_statement(
-            self.parse_tree, output_variable
-        )
+        full_table_names: List[str] = []
 
-        if variable_statement is None:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-variable-statement",
-                f"output variable ({output_variable}) statement not found in table expression",
-            )
-            return None
+        LOGGER.debug("Processing Snowflake token-dict %s", token_dict)
 
-        schema_variable, tokens = self.get_item_selector_tokens(variable_statement)
-        if schema_variable is None or tokens is None:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-variable-statement",
-                "table name not found in table expression",
-            )
-            return None
+        data_access_dict: Dict[str, Any] = list(token_dict.values())[0]
+        del data_access_dict["arg_list"]
 
-        table_name: str = tokens["Name"]
+        for source in data_access_dict:
+            for db_its in data_access_dict[source]["item_selectors"]:
+                db_name: str = db_its["items"]["Name"]
+                for schema_its in data_access_dict[source][db_its["assigned_to"]]["item_selectors"]:
+                    schema_name: str = schema_its["items"]["Name"]
+                    for table_its in data_access_dict[source][db_its["assigned_to"]][schema_its["assigned_to"]]["item_selectors"]:
+                        table_name: str = table_its["items"]["Name"]
+                        full_table_names.append(
+                            f"{db_name}.{schema_name}.{table_name}"
+                        )
 
-        # Find step for the schema variable
-        variable_statement = tree_function.get_variable_statement(self.parse_tree, schema_variable)
-        if variable_statement is None:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-schema-variable-statement",
-                f"schema variable ({schema_variable}) statement not found in table expression",
-            )
-            return None
-
-        source_variable, tokens = self.get_item_selector_tokens(variable_statement)
-        if source_variable is None or tokens is None:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-variable-statement",
-                "schema name not found in table expression",
-            )
-            return None
-
-        schema_name: str = tokens["Name"]
-
-        # Find step for the database access variable
-        variable_statement = tree_function.get_variable_statement(self.parse_tree, source_variable)
-        if variable_statement is None:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-source-variable-statement",
-                f"schema variable ({source_variable}) statement not found in table expression",
-            )
-            return None
-        _, tokens = self.get_item_selector_tokens(variable_statement)
-        if tokens is None:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-variable-statement",
-                "database name not found in table expression",
-            )
-            return None
+        LOGGER.debug("Snowflake full-table-name %s", full_table_names)
 
-        db_name: str = tokens["Name"]
+        return full_table_names
 
-        return f"{db_name}.{schema_name}.{table_name}"
 
-
-class NativeQueryFullTableNameCreator(FullTableNameCreator):
+class NativeQueryTableFullNameCreator(AbstractTableFullNameCreator):
+    def get_platform_pair(self) -> DataPlatformPair:
+        return SupportedDataPlatform.POSTGRES_SQL.value
 
     def get_full_table_names(self, token_dict: Dict[str, Any]) -> List[str]:
-        pass
+        print("===NATIVE========")
+        for source in token_dict:
+            print(tree_function.token_values(token_dict[source]["arg_list"]))
+        return []
 
 
 class FunctionName(Enum):
@@ -452,55 +449,35 @@ class FunctionName(Enum):
 
 class SupportedResolver(Enum):
     POSTGRES_SQL = (
-        DataPlatformPair(
-            powerbi_data_platform_name="PostgreSQL",
-            datahub_data_platform_name="postgres"
-        ),
-        PostgresFullTableNameCreator,
+        PostgresTableFullNameCreator,
         FunctionName.POSTGRESQL_DATA_ACCESS,
     )
 
     ORACLE = (
-        DataPlatformPair(
-            powerbi_data_platform_name="Oracle",
-            datahub_data_platform_name="oracle"
-        ),
-        OracleFullTableNameCreator,
+        OracleTableFullNameCreator,
         FunctionName.ORACLE_DATA_ACCESS,
     )
 
     SNOWFLAKE = (
-        DataPlatformPair(
-            powerbi_data_platform_name="Snowflake",
-            datahub_data_platform_name="snowflake"
-        ),
-        SnowflakeFullTableNameCreator,
+        SnowflakeTableFullNameCreator,
         FunctionName.SNOWFLAKE_DATA_ACCESS,
     )
 
     MS_SQL = (
-        DataPlatformPair(
-            powerbi_data_platform_name="Sql",
-            datahub_data_platform_name="mssql"
-        ),
-        MSSqlFullTableNameCreator,
+        MSSqlTableFullNameCreator,
         FunctionName.MSSQL_DATA_ACCESS,
     )
 
     NATIVE_QUERY = (
-        None,
-        NativeQueryFullTableNameCreator,
+        NativeQueryTableFullNameCreator,
         FunctionName.NATIVE_QUERY,
     )
 
-    def get_data_platform_pair(self) -> DataPlatformPair:
+    def get_table_full_name_creator(self) -> Type[AbstractTableFullNameCreator]:
         return self.value[0]
 
-    def get_table_full_name_creator(self) -> Type[FullTableNameCreator]:
-        return self.value[1]
-
     def get_function_name(self) -> str:
-        return self.value[2].value
+        return self.value[1].value
 
     @staticmethod
     def get_function_names() -> List[str]:
@@ -514,8 +491,9 @@ def get_function_names() -> List[str]:
 
     @staticmethod
     def get_resolver(function_name: str) -> Optional["SupportedResolver"]:
+        LOGGER.debug("Looking for resolver %s", function_name)
         for supported_resolver in SupportedResolver:
             if function_name == supported_resolver.get_function_name():
                 return supported_resolver
-
+        LOGGER.debug("Looking not found for resolver %s", function_name)
         return None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/validator.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/validator.py
index 3941e4ed38ed5..02edab6dac758 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/validator.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/validator.py
@@ -40,12 +40,7 @@ def validate_parse_tree(tree: Tree, native_query_enabled: bool = True) -> Tuple[
     """
     functions: List[str] = tree_function.get_all_function_name(tree)
     if len(functions) == 0:
-        return False, "Function call not found"
-
-    data_access_function_names: List[str] = [x.get_function_name().value for x in resolver.SupportedDataPlatform]
-    result: Set[str] = set(data_access_function_names) & set(functions)
-    if len(result) != 1:
-        return False, f"More than one data-access functions are found in expression. Functions = {result}"
+        return False, "Function calls not found"
 
     if native_query_enabled is False:
         if resolver.FunctionName.NATIVE_QUERY.value in functions:
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index 2709a7db0e304..d65bfd84774e5 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -9,6 +9,7 @@
 from datahub.ingestion.source.powerbi.config import PowerBiDashboardSourceReport
 from datahub.ingestion.source.powerbi.m_query.resolver import (
     DataPlatformTable,
+    SupportedResolver,
     SupportedDataPlatform,
 )
 from datahub.ingestion.source.powerbi.proxy import PowerBiAPI
@@ -111,77 +112,138 @@
 #     assert tree_function.get_output_variable(parse_tree) == "two_source_table"
 #
 #
-# def test_snowflake_regular_case():
-#     q: str = M_QUERIES[0]
-#     table: PowerBiAPI.Table = PowerBiAPI.Table(
-#         expression=q,
-#         name="virtual_order_table",
-#         full_name="OrderDataSet.virtual_order_table",
-#     )
-#
-#     reporter = PowerBiDashboardSourceReport()
-#
-#     data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
-#         table, reporter
-#     )
-#
-#     assert len(data_platform_tables) == 1
-#     assert data_platform_tables[0].name == "TESTTABLE"
-#     assert data_platform_tables[0].full_name == "PBI_TEST.TEST.TESTTABLE"
-#     assert (
-#         data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
-#         == SupportedDataPlatform.SNOWFLAKE.get_data_platform_pair().powerbi_data_platform_name
-#     )
-#
-#
-# def test_postgres_regular_case():
-#     q: str = M_QUERIES[13]
-#     table: PowerBiAPI.Table = PowerBiAPI.Table(
-#         expression=q,
-#         name="virtual_order_table",
-#         full_name="OrderDataSet.virtual_order_table",
-#     )
-#
-#     reporter = PowerBiDashboardSourceReport()
-#     data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
-#         table, reporter
-#     )
-#
-#     assert len(data_platform_tables) == 1
-#     assert data_platform_tables[0].name == "order_date"
-#     assert data_platform_tables[0].full_name == "mics.public.order_date"
-#     assert (
-#         data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
-#         == SupportedDataPlatform.POSTGRES_SQL.get_data_platform_pair().powerbi_data_platform_name
-#     )
-#
-#
-# def test_oracle_regular_case():
-#     q: str = M_QUERIES[14]
-#     table: PowerBiAPI.Table = PowerBiAPI.Table(
-#         expression=q,
-#         name="virtual_order_table",
-#         full_name="OrderDataSet.virtual_order_table",
-#     )
-#
-#     reporter = PowerBiDashboardSourceReport()
-#     data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
-#         table, reporter
-#     )
-#
-#     assert len(data_platform_tables) == 1
-#     assert data_platform_tables[0].name == "EMPLOYEES"
-#     assert data_platform_tables[0].full_name == "salesdb.HR.EMPLOYEES"
-#     assert (
-#             data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
-#             == SupportedDataPlatform.ORACLE.get_data_platform_pair().powerbi_data_platform_name
-#     )
-#
+def test_snowflake_regular_case():
+    q: str = M_QUERIES[0]
+    table: PowerBiAPI.Table = PowerBiAPI.Table(
+        expression=q,
+        name="virtual_order_table",
+        full_name="OrderDataSet.virtual_order_table",
+    )
+
+    reporter = PowerBiDashboardSourceReport()
+
+    data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
+        table, reporter
+    )
+
+    assert len(data_platform_tables) == 1
+    assert data_platform_tables[0].name == "TESTTABLE"
+    assert data_platform_tables[0].full_name == "PBI_TEST.TEST.TESTTABLE"
+    assert (
+        data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
+        == SupportedDataPlatform.SNOWFLAKE.value.powerbi_data_platform_name
+    )
+
+
+def test_postgres_regular_case():
+    q: str = M_QUERIES[13]
+    table: PowerBiAPI.Table = PowerBiAPI.Table(
+        expression=q,
+        name="virtual_order_table",
+        full_name="OrderDataSet.virtual_order_table",
+    )
+
+    reporter = PowerBiDashboardSourceReport()
+    data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
+        table, reporter
+    )
+
+    assert len(data_platform_tables) == 1
+    assert data_platform_tables[0].name == "order_date"
+    assert data_platform_tables[0].full_name == "mics.public.order_date"
+    assert (
+        data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
+        == SupportedDataPlatform.POSTGRES_SQL.value.powerbi_data_platform_name
+    )
+
+
+def test_oracle_regular_case():
+    q: str = M_QUERIES[14]
+    table: PowerBiAPI.Table = PowerBiAPI.Table(
+        expression=q,
+        name="virtual_order_table",
+        full_name="OrderDataSet.virtual_order_table",
+    )
+
+    reporter = PowerBiDashboardSourceReport()
+    data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
+        table, reporter
+    )
+
+    assert len(data_platform_tables) == 1
+    assert data_platform_tables[0].name == "EMPLOYEES"
+    assert data_platform_tables[0].full_name == "salesdb.HR.EMPLOYEES"
+    assert (
+            data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
+            == SupportedDataPlatform.ORACLE.value.powerbi_data_platform_name
+    )
+
+
+def test_mssql_regular_case():
+    q: str = M_QUERIES[15]
+    table: PowerBiAPI.Table = PowerBiAPI.Table(
+        expression=q,
+        name="virtual_order_table",
+        full_name="OrderDataSet.virtual_order_table",
+    )
+
+    reporter = PowerBiDashboardSourceReport()
+
+    data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
+        table, reporter
+    )
+
+    assert len(data_platform_tables) == 1
+    assert data_platform_tables[0].name == "book_issue"
+    assert data_platform_tables[0].full_name == "library.dbo.book_issue"
+    assert (
+        data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
+        == SupportedDataPlatform.MS_SQL.value.powerbi_data_platform_name
+    )
+
+
+def test_mssql_with_query():
+    mssql_queries: List[str] = [
+        M_QUERIES[3],
+        M_QUERIES[4],
+        M_QUERIES[5],
+        M_QUERIES[7],
+        M_QUERIES[8],
+        M_QUERIES[11],
+    ]
+    expected_tables = [
+        "COMMOPSDB.dbo.V_OIP_ENT_2022",
+        "COMMOPSDB.dbo.V_INVOICE_BOOKING_2022",
+        "COMMOPSDB.dbo.V_ARR_ADDS",
+        "COMMOPSDB.dbo.V_PS_CD_RETENTION",
+        "COMMOPSDB.dbo.V_TPV_LEADERBOARD",
+        "COMMOPSDB.dbo.V_ENTERPRISE_INVOICED_REVENUE",
+    ]
+
+    for index, query in enumerate(mssql_queries):
+        table: PowerBiAPI.Table = PowerBiAPI.Table(
+            expression=query,
+            name="virtual_order_table",
+            full_name="OrderDataSet.virtual_order_table",
+        )
+        reporter = PowerBiDashboardSourceReport()
+
+        data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
+            table, reporter, native_query_enabled=False
+        )
+
+        assert len(data_platform_tables) == 1
+        assert data_platform_tables[0].name == expected_tables[index].split(".")[2]
+        assert data_platform_tables[0].full_name == expected_tables[index]
+        assert (
+                data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
+                == SupportedDataPlatform.MS_SQL.value.powerbi_data_platform_name
+        )
+
 #
-# def test_mssql_regular_case():
-#     q: str = M_QUERIES[15]
+# def test_native_query_disabled():
 #     table: PowerBiAPI.Table = PowerBiAPI.Table(
-#         expression=q,
+#         expression=M_QUERIES[1],
 #         name="virtual_order_table",
 #         full_name="OrderDataSet.virtual_order_table",
 #     )
@@ -189,21 +251,15 @@
 #     reporter = PowerBiDashboardSourceReport()
 #
 #     data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
-#         table, reporter
-#     )
-#
-#     assert len(data_platform_tables) == 1
-#     assert data_platform_tables[0].name == "book_issue"
-#     assert data_platform_tables[0].full_name == "library.dbo.book_issue"
-#     assert (
-#         data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
-#         == SupportedDataPlatform.MS_SQL.get_data_platform_pair().powerbi_data_platform_name
+#         table, reporter, native_query_enabled=False
 #     )
 #
-#
+#     assert len(data_platform_tables) == 0
+
 # def test_native_query_disabled():
+#     # for q in M_QUERIES:
 #     table: PowerBiAPI.Table = PowerBiAPI.Table(
-#         expression=M_QUERIES[1],
+#         expression=M_QUERIES[13],
 #         name="virtual_order_table",
 #         full_name="OrderDataSet.virtual_order_table",
 #     )
@@ -211,22 +267,7 @@
 #     reporter = PowerBiDashboardSourceReport()
 #
 #     data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
-#         table, reporter, native_query_enabled=False
+#         table, reporter
 #     )
 #
 #     assert len(data_platform_tables) == 0
-
-def test_native_query_disabled():
-    table: PowerBiAPI.Table = PowerBiAPI.Table(
-        expression=M_QUERIES[9],
-        name="virtual_order_table",
-        full_name="OrderDataSet.virtual_order_table",
-    )
-
-    reporter = PowerBiDashboardSourceReport()
-
-    data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
-        table, reporter
-    )
-
-    assert len(data_platform_tables) == 0

From 8c8fff40f9b7a81298f2b51948c680e1f6f01258 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Mon, 19 Dec 2022 17:34:15 +0530
Subject: [PATCH 27/53] Working native and regular cases

---
 .../powerbi/m_query/native_sql_parser.py      |  18 ++
 .../source/powerbi/m_query/resolver.py        |  41 ++-
 .../source/powerbi/m_query/tree_function.py   |  12 +
 .../integration/powerbi/test_m_parser.py      | 239 ++++++++++--------
 4 files changed, 196 insertions(+), 114 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py
index f0ee706c1865b..bc2881119167f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py
@@ -1,8 +1,23 @@
+import logging
+
 import sqlparse
 from typing import List
 
+SPECIAL_CHARACTERS = ["#(lf)", "(lf)"]
+
+LOGGER = logging.getLogger()
+
+
+def remove_special_characters(native_query: str) -> str:
+    for char in SPECIAL_CHARACTERS:
+        native_query = native_query.replace(char, " ")
+
+    return native_query
+
 
 def get_tables(native_query: str) -> List[str]:
+    native_query = remove_special_characters(native_query)
+    LOGGER.debug("Processing query = %s", native_query)
     # As per current use-case, we are extracting only single table from "from"
     tables: List[str] = []
     parsed = sqlparse.parse(native_query)[0]
@@ -11,6 +26,7 @@ def get_tables(native_query: str) -> List[str]:
     length: int = len(tokens)
     from_index: int = -1
     for index, token in enumerate(tokens):
+        LOGGER.debug("%s=%s",  token.value, token.ttype)
         if token.value.lower().strip() == "from" and str(token.ttype) == "Token.Keyword":
             from_index = index+1
             break
@@ -18,6 +34,8 @@ def get_tables(native_query: str) -> List[str]:
     table_name = None
 
     while from_index < length:
+        LOGGER.debug("%s=%s", tokens[from_index].value, tokens[from_index].ttype)
+        LOGGER.debug("Type=%s", type(tokens[from_index]))
         if isinstance(tokens[from_index], sqlparse.sql.Identifier):
             table_name = tokens[from_index].value
             break
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
index bf78b357073f6..d787a67d8d225 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
@@ -430,13 +430,44 @@ def get_full_table_names(self, token_dict: Dict[str, Any]) -> List[str]:
 
 class NativeQueryTableFullNameCreator(AbstractTableFullNameCreator):
     def get_platform_pair(self) -> DataPlatformPair:
-        return SupportedDataPlatform.POSTGRES_SQL.value
+        return SupportedDataPlatform.SNOWFLAKE.value
 
     def get_full_table_names(self, token_dict: Dict[str, Any]) -> List[str]:
-        print("===NATIVE========")
-        for source in token_dict:
-            print(tree_function.token_values(token_dict[source]["arg_list"]))
-        return []
+        full_table_names: List[str] = []
+        data_access_dict: Dict[str, Any] = list(token_dict.values())[0]
+        t1: Tree = tree_function.first_arg_list_func(data_access_dict["arg_list"])
+        flat_argument_list: List[Tree] = tree_function.flat_argument_list(t1)
+
+        if len(flat_argument_list) != 2:
+            LOGGER.debug("Expecting 2 argument, actual argument count is %s", len(flat_argument_list))
+            LOGGER.debug("Flat argument list = %s", flat_argument_list)
+            return full_table_names
+
+        data_access_tokens: List[str] = tree_function.remove_whitespaces_from_list(
+            tree_function.token_values(flat_argument_list[0])
+        )
+        if data_access_tokens[0] != SupportedDataPlatform.SNOWFLAKE.value.powerbi_data_platform_name:
+            LOGGER.debug("Provided native-query data-platform = %s", data_access_tokens[0])
+            LOGGER.debug("Only Snowflake is supported in NativeQuery")
+            return full_table_names
+
+        # First argument is the query
+        sql_query: str = tree_function.strip_char_from_list(
+            values=tree_function.remove_whitespaces_from_list(
+                        tree_function.token_values(flat_argument_list[1])
+                    ),
+            char="\""
+
+        )[0]  # Remove any whitespaces and double quotes character
+
+        for table in native_sql_parser.get_tables(sql_query):
+            if len(table.split(".")) != 3:
+                LOGGER.debug("Skipping table (%s) as it is not as per full_table_name format", table)
+            full_table_names.append(
+                table
+            )
+
+        return full_table_names
 
 
 class FunctionName(Enum):
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py
index 66922e9e11e73..f13688c1bd84a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py
@@ -135,6 +135,18 @@ def get_all_function_name(tree: Tree) -> List[str]:
     return functions
 
 
+def flat_argument_list(tree: Tree) -> List[Tree]:
+    values: List[str] = []
+
+    for child in tree.children:
+        if isinstance(child, Token):
+            continue
+        if isinstance(child, Tree) and (child.data == "argument_list" or child.data == "expression"):
+            values.append(child)
+
+    return values
+
+
 first_expression_func = partial(get_first_rule, rule="expression")
 first_item_selector_func = partial(get_first_rule, rule="item_selector")
 first_arg_list_func = partial(get_first_rule, rule="argument_list")
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index d65bfd84774e5..8b42a924dab05 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -34,84 +34,84 @@
 ]
 
 
-# def test_parse_m_query1():
-#     expression: str = M_QUERIES[0]
-#     parse_tree: Tree = parser._parse_expression(expression)
-#     assert tree_function.get_output_variable(parse_tree) == "TESTTABLE_Table"
-#
-#
-# def test_parse_m_query2():
-#     expression: str = M_QUERIES[1]
-#     parse_tree: Tree = parser._parse_expression(expression)
-#     assert tree_function.get_output_variable(parse_tree) == '"Added Custom2"'
-#
-#
-# def test_parse_m_query3():
-#     expression: str = M_QUERIES[2]
-#     parse_tree: Tree = parser._parse_expression(expression)
-#     assert tree_function.get_output_variable(parse_tree) == '"Added Conditional Column"'
-#
-#
-# def test_parse_m_query4():
-#     expression: str = M_QUERIES[3]
-#     parse_tree: Tree = parser._parse_expression(expression)
-#     assert tree_function.get_output_variable(parse_tree) == '"Changed Type"'
-#
-#
-# def test_parse_m_query5():
-#     expression: str = M_QUERIES[4]
-#     parse_tree: Tree = parser._parse_expression(expression)
-#     assert tree_function.get_output_variable(parse_tree) == '"Renamed Columns"'
-#
-#
-# def test_parse_m_query6():
-#     expression: str = M_QUERIES[5]
-#     parse_tree: Tree = parser._parse_expression(expression)
-#     assert tree_function.get_output_variable(parse_tree) == '"Added Custom"'
-#
-#
-# def test_parse_m_query7():
-#     expression: str = M_QUERIES[6]
-#     parse_tree: Tree = parser._parse_expression(expression)
-#     assert tree_function.get_output_variable(parse_tree) == "Source"
-#
-#
-# def test_parse_m_query8():
-#     expression: str = M_QUERIES[7]
-#     parse_tree: Tree = parser._parse_expression(expression)
-#     assert tree_function.get_output_variable(parse_tree) == '"Added Custom1"'
-#
-#
-# def test_parse_m_query9():
-#     expression: str = M_QUERIES[8]
-#     parse_tree: Tree = parser._parse_expression(expression)
-#     assert tree_function.get_output_variable(parse_tree) == '"Added Custom1"'
-#
-#
-# def test_parse_m_query10():
-#     expression: str = M_QUERIES[9]
-#     parse_tree: Tree = parser._parse_expression(expression)
-#     assert tree_function.get_output_variable(parse_tree) == '"Changed Type1"'
-#
-#
-# def test_parse_m_query11():
-#     expression: str = M_QUERIES[10]
-#     parse_tree: Tree = parser._parse_expression(expression)
-#     assert tree_function.get_output_variable(parse_tree) == "Source"
-#
-#
-# def test_parse_m_query12():
-#     expression: str = M_QUERIES[11]
-#     parse_tree: Tree = parser._parse_expression(expression)
-#     assert tree_function.get_output_variable(parse_tree) == '"Added Custom"'
-#
-#
-# def test_parse_m_query13():
-#     expression: str = M_QUERIES[12]
-#     parse_tree: Tree = parser._parse_expression(expression)
-#     assert tree_function.get_output_variable(parse_tree) == "two_source_table"
-#
-#
+def test_parse_m_query1():
+    expression: str = M_QUERIES[0]
+    parse_tree: Tree = parser._parse_expression(expression)
+    assert tree_function.get_output_variable(parse_tree) == "TESTTABLE_Table"
+
+
+def test_parse_m_query2():
+    expression: str = M_QUERIES[1]
+    parse_tree: Tree = parser._parse_expression(expression)
+    assert tree_function.get_output_variable(parse_tree) == '"Added Custom2"'
+
+
+def test_parse_m_query3():
+    expression: str = M_QUERIES[2]
+    parse_tree: Tree = parser._parse_expression(expression)
+    assert tree_function.get_output_variable(parse_tree) == '"Added Conditional Column"'
+
+
+def test_parse_m_query4():
+    expression: str = M_QUERIES[3]
+    parse_tree: Tree = parser._parse_expression(expression)
+    assert tree_function.get_output_variable(parse_tree) == '"Changed Type"'
+
+
+def test_parse_m_query5():
+    expression: str = M_QUERIES[4]
+    parse_tree: Tree = parser._parse_expression(expression)
+    assert tree_function.get_output_variable(parse_tree) == '"Renamed Columns"'
+
+
+def test_parse_m_query6():
+    expression: str = M_QUERIES[5]
+    parse_tree: Tree = parser._parse_expression(expression)
+    assert tree_function.get_output_variable(parse_tree) == '"Added Custom"'
+
+
+def test_parse_m_query7():
+    expression: str = M_QUERIES[6]
+    parse_tree: Tree = parser._parse_expression(expression)
+    assert tree_function.get_output_variable(parse_tree) == "Source"
+
+
+def test_parse_m_query8():
+    expression: str = M_QUERIES[7]
+    parse_tree: Tree = parser._parse_expression(expression)
+    assert tree_function.get_output_variable(parse_tree) == '"Added Custom1"'
+
+
+def test_parse_m_query9():
+    expression: str = M_QUERIES[8]
+    parse_tree: Tree = parser._parse_expression(expression)
+    assert tree_function.get_output_variable(parse_tree) == '"Added Custom1"'
+
+
+def test_parse_m_query10():
+    expression: str = M_QUERIES[9]
+    parse_tree: Tree = parser._parse_expression(expression)
+    assert tree_function.get_output_variable(parse_tree) == '"Changed Type1"'
+
+
+def test_parse_m_query11():
+    expression: str = M_QUERIES[10]
+    parse_tree: Tree = parser._parse_expression(expression)
+    assert tree_function.get_output_variable(parse_tree) == "Source"
+
+
+def test_parse_m_query12():
+    expression: str = M_QUERIES[11]
+    parse_tree: Tree = parser._parse_expression(expression)
+    assert tree_function.get_output_variable(parse_tree) == '"Added Custom"'
+
+
+def test_parse_m_query13():
+    expression: str = M_QUERIES[12]
+    parse_tree: Tree = parser._parse_expression(expression)
+    assert tree_function.get_output_variable(parse_tree) == "two_source_table"
+
+
 def test_snowflake_regular_case():
     q: str = M_QUERIES[0]
     table: PowerBiAPI.Table = PowerBiAPI.Table(
@@ -240,34 +240,55 @@ def test_mssql_with_query():
                 == SupportedDataPlatform.MS_SQL.value.powerbi_data_platform_name
         )
 
-#
-# def test_native_query_disabled():
-#     table: PowerBiAPI.Table = PowerBiAPI.Table(
-#         expression=M_QUERIES[1],
-#         name="virtual_order_table",
-#         full_name="OrderDataSet.virtual_order_table",
-#     )
-#
-#     reporter = PowerBiDashboardSourceReport()
-#
-#     data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
-#         table, reporter, native_query_enabled=False
-#     )
-#
-#     assert len(data_platform_tables) == 0
-
-# def test_native_query_disabled():
-#     # for q in M_QUERIES:
-#     table: PowerBiAPI.Table = PowerBiAPI.Table(
-#         expression=M_QUERIES[13],
-#         name="virtual_order_table",
-#         full_name="OrderDataSet.virtual_order_table",
-#     )
-#
-#     reporter = PowerBiDashboardSourceReport()
-#
-#     data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
-#         table, reporter
-#     )
-#
-#     assert len(data_platform_tables) == 0
+
+def test_snowflake_native_query():
+    snowflake_queries: List[str] = [
+        M_QUERIES[1],
+        M_QUERIES[2],
+        M_QUERIES[6],
+        M_QUERIES[10],
+    ]
+
+    expected_tables = [
+        "OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4",
+        "OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS",
+        "OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS",
+        "OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS",
+    ]
+
+    for index, query in enumerate(snowflake_queries):
+        table: PowerBiAPI.Table = PowerBiAPI.Table(
+            expression=query,
+            name="virtual_order_table",
+            full_name="OrderDataSet.virtual_order_table",
+        )
+        reporter = PowerBiDashboardSourceReport()
+
+        data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
+            table, reporter
+        )
+
+        assert len(data_platform_tables) == 1
+        assert data_platform_tables[0].name == expected_tables[index].split(".")[2]
+        assert data_platform_tables[0].full_name == expected_tables[index]
+        assert (
+                data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
+                == SupportedDataPlatform.SNOWFLAKE.value.powerbi_data_platform_name
+        )
+
+
+def test_native_query_disabled():
+    table: PowerBiAPI.Table = PowerBiAPI.Table(
+        expression=M_QUERIES[1],
+        name="virtual_order_table",
+        full_name="OrderDataSet.virtual_order_table",
+    )
+
+    reporter = PowerBiDashboardSourceReport()
+
+    data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
+        table, reporter, native_query_enabled=False
+    )
+
+    assert len(data_platform_tables) == 0
+

From 3719107b719ee4496e7eda16c7c49e11f1a15cc4 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Mon, 19 Dec 2022 18:26:43 +0530
Subject: [PATCH 28/53] lint fix

---
 .../powerbi/m_query/native_sql_parser.py      |  11 +-
 .../source/powerbi/m_query/parser.py          |  17 +-
 .../source/powerbi/m_query/resolver.py        | 213 +++++++++++-------
 .../source/powerbi/m_query/tree_function.py   |  11 +-
 .../source/powerbi/m_query/validator.py       |  21 +-
 .../ingestion/source/powerbi/powerbi.py       |  32 ++-
 .../integration/powerbi/test_m_parser.py      |  19 +-
 7 files changed, 188 insertions(+), 136 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py
index bc2881119167f..e64c3b77cff93 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py
@@ -1,7 +1,7 @@
 import logging
+from typing import List
 
 import sqlparse
-from typing import List
 
 SPECIAL_CHARACTERS = ["#(lf)", "(lf)"]
 
@@ -26,9 +26,12 @@ def get_tables(native_query: str) -> List[str]:
     length: int = len(tokens)
     from_index: int = -1
     for index, token in enumerate(tokens):
-        LOGGER.debug("%s=%s",  token.value, token.ttype)
-        if token.value.lower().strip() == "from" and str(token.ttype) == "Token.Keyword":
-            from_index = index+1
+        LOGGER.debug("%s=%s", token.value, token.ttype)
+        if (
+            token.value.lower().strip() == "from"
+            and str(token.ttype) == "Token.Keyword"
+        ):
+            from_index = index + 1
             break
 
     table_name = None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
index 2b442f1394037..1c4b674d5ef05 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
@@ -1,15 +1,13 @@
 import importlib.resources as pkg_resource
 import logging
-from typing import List, Optional
+from typing import List, cast
 
 import lark
 from lark import Lark, Tree
 
 from datahub.ingestion.source.powerbi.config import PowerBiDashboardSourceReport
-
+from datahub.ingestion.source.powerbi.m_query import resolver, validator
 from datahub.ingestion.source.powerbi.proxy import PowerBiAPI
-from datahub.ingestion.source.powerbi.m_query import validator
-from datahub.ingestion.source.powerbi.m_query import resolver
 
 LOGGER = logging.getLogger(__name__)
 
@@ -45,13 +43,12 @@ def get_upstream_tables(
 
     try:
         parse_tree: Tree = _parse_expression(table.expression)
-        valid, message = validator.validate_parse_tree(parse_tree, native_query_enabled=native_query_enabled)
+        valid, message = validator.validate_parse_tree(
+            parse_tree, native_query_enabled=native_query_enabled
+        )
         if valid is False:
-            LOGGER.debug("Validation failed: %s", message)
-            reporter.report_warning(
-                table.full_name,
-                message
-            )
+            LOGGER.debug("Validation failed: %s", cast(str, message))
+            reporter.report_warning(table.full_name, cast(str, message))
             return []
     except lark.exceptions.UnexpectedCharacters as e:
         LOGGER.debug(f"Fail to parse expression {table.expression}", exc_info=e)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
index d787a67d8d225..12a216f838b90 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
@@ -1,17 +1,15 @@
 import logging
 from abc import ABC, abstractmethod
-from typing import Dict, Optional, List, cast, Tuple, Type, Any
-
-from lark import Tree
-
 from dataclasses import dataclass
 from enum import Enum
+from typing import Any, Dict, List, Optional, Tuple, Type, cast
+
+from lark import Tree
 
 from datahub.ingestion.source.powerbi.config import PowerBiDashboardSourceReport
+from datahub.ingestion.source.powerbi.m_query import native_sql_parser, tree_function
 from datahub.ingestion.source.powerbi.proxy import PowerBiAPI
 
-from datahub.ingestion.source.powerbi.m_query import tree_function, native_sql_parser
-
 LOGGER = logging.getLogger(__name__)
 
 
@@ -30,24 +28,20 @@ class DataPlatformTable:
 
 class SupportedDataPlatform(Enum):
     POSTGRES_SQL = DataPlatformPair(
-            powerbi_data_platform_name="PostgreSQL",
-            datahub_data_platform_name="postgres"
-        )
+        powerbi_data_platform_name="PostgreSQL", datahub_data_platform_name="postgres"
+    )
 
     ORACLE = DataPlatformPair(
-            powerbi_data_platform_name="Oracle",
-            datahub_data_platform_name="oracle"
-        )
+        powerbi_data_platform_name="Oracle", datahub_data_platform_name="oracle"
+    )
 
     SNOWFLAKE = DataPlatformPair(
-            powerbi_data_platform_name="Snowflake",
-            datahub_data_platform_name="snowflake"
-        )
+        powerbi_data_platform_name="Snowflake", datahub_data_platform_name="snowflake"
+    )
 
     MS_SQL = DataPlatformPair(
-            powerbi_data_platform_name="Sql",
-            datahub_data_platform_name="mssql"
-        )
+        powerbi_data_platform_name="Sql", datahub_data_platform_name="mssql"
+    )
 
 
 class AbstractTableFullNameCreator(ABC):
@@ -74,7 +68,6 @@ def __init__(
         self.table = table
         self.parse_tree = parse_tree
         self.reporter = reporter
-        self.specific_resolver = {}
 
     @abstractmethod
     def resolve_to_data_platform_table_list(self) -> List[DataPlatformTable]:
@@ -84,16 +77,20 @@ def resolve_to_data_platform_table_list(self) -> List[DataPlatformTable]:
 class BaseMQueryResolver(AbstractDataAccessMQueryResolver, ABC):
     @staticmethod
     def get_item_selector_tokens(
-            expression_tree: Tree
+        expression_tree: Tree,
     ) -> Tuple[Optional[str], Optional[Dict[str, str]]]:
 
-        item_selector: Optional[Tree] = tree_function.first_item_selector_func(expression_tree)
+        item_selector: Optional[Tree] = tree_function.first_item_selector_func(
+            expression_tree
+        )
         if item_selector is None:
             LOGGER.debug("Item Selector not found in tree")
             LOGGER.debug(expression_tree.pretty())
             return None, None
 
-        identifier_tree: Optional[Tree] = tree_function.first_identifier_func(expression_tree)
+        identifier_tree: Optional[Tree] = tree_function.first_identifier_func(
+            expression_tree
+        )
         if identifier_tree is None:
             LOGGER.debug("Identifier not found in tree")
             LOGGER.debug(item_selector.pretty())
@@ -101,7 +98,9 @@ def get_item_selector_tokens(
 
         # remove whitespaces and quotes from token
         tokens: List[str] = tree_function.strip_char_from_list(
-            tree_function.remove_whitespaces_from_list(tree_function.token_values(cast(Tree, item_selector))),
+            tree_function.remove_whitespaces_from_list(
+                tree_function.token_values(cast(Tree, item_selector))
+            ),
             '"',
         )
         identifier: List[str] = tree_function.token_values(
@@ -113,12 +112,16 @@ def get_item_selector_tokens(
         return identifier[0], dict(zip(iterator, iterator))
 
     def get_argument_list(self, variable_statement: Tree) -> Optional[Tree]:
-        expression_tree: Optional[Tree] = tree_function.first_expression_func(variable_statement)
+        expression_tree: Optional[Tree] = tree_function.first_expression_func(
+            variable_statement
+        )
         if expression_tree is None:
             LOGGER.debug("First expression rule not found in input tree")
             return None
 
-        argument_list: Optional[Tree] = tree_function.first_arg_list_func(expression_tree)
+        argument_list: Optional[Tree] = tree_function.first_arg_list_func(
+            expression_tree
+        )
         if argument_list is None:
             LOGGER.debug("First argument-list rule not found in input tree")
             return None
@@ -128,7 +131,11 @@ def get_argument_list(self, variable_statement: Tree) -> Optional[Tree]:
     def make_token_dict(self, identifier: str) -> Dict[str, Any]:
         token_dict: Dict[str, Any] = {}
 
-        def fill_token_dict(identifier: str, supported_data_access_func: List[str], t_dict: Dict[str, Any]) -> None:
+        def fill_token_dict(
+            identifier: str,
+            supported_data_access_func: List[str],
+            t_dict: Dict[str, Any],
+        ) -> None:
             """
             1) Find statement where identifier appear in the left-hand side i.e. identifier  = expression
             2) Check expression is function invocation i.e. invoke_expression or item_selector
@@ -155,12 +162,16 @@ def fill_token_dict(identifier: str, supported_data_access_func: List[str], t_di
                 )
                 return None
 
-            expression_tree: Optional[Tree] = tree_function.first_expression_func(v_statement)
+            expression_tree: Optional[Tree] = tree_function.first_expression_func(
+                v_statement
+            )
             if expression_tree is None:
                 LOGGER.debug("Expression tree not found")
                 LOGGER.debug(v_statement.pretty())
                 return None
-            invoke_expression: Optional[Tree] = tree_function.first_invoke_expression_func(expression_tree)
+            invoke_expression: Optional[
+                Tree
+            ] = tree_function.first_invoke_expression_func(expression_tree)
             if invoke_expression is not None:
                 letter_tree: Tree = invoke_expression.children[0]
                 data_access_func: str = tree_function.make_function_name(letter_tree)
@@ -175,49 +186,61 @@ def fill_token_dict(identifier: str, supported_data_access_func: List[str], t_di
                     )
                     return
 
-                first_arg_tree: Optional[Tree] = tree_function.first_arg_list_func(invoke_expression)
+                first_arg_tree: Optional[Tree] = tree_function.first_arg_list_func(
+                    invoke_expression
+                )
                 if first_arg_tree is None:
-                    LOGGER.debug("Function invocation without argument in expression = %s", invoke_expression.pretty())
+                    LOGGER.debug(
+                        "Function invocation without argument in expression = %s",
+                        invoke_expression.pretty(),
+                    )
                     self.reporter.report_warning(
                         f"{self.table.full_name}-variable-statement",
-                        f"Function invocation without argument",
+                        "Function invocation without argument",
                     )
                     return None
-                type_expression: Optional[Tree] = tree_function.first_type_expression_func(first_arg_tree)
+                type_expression: Optional[
+                    Tree
+                ] = tree_function.first_type_expression_func(first_arg_tree)
                 if type_expression is None:
-                    LOGGER.debug("Type expression not found in expression = %s", first_arg_tree.pretty())
+                    LOGGER.debug(
+                        "Type expression not found in expression = %s",
+                        first_arg_tree.pretty(),
+                    )
                     self.reporter.report_warning(
                         f"{self.table.full_name}-variable-statement",
-                        f"Type expression not found",
+                        "Type expression not found",
                     )
                     return None
 
                 tokens: List[str] = tree_function.token_values(type_expression)
                 if len(tokens) != 1:
-                    LOGGER.debug("type-expression has more than one identifier = %s", type_expression.pretty())
+                    LOGGER.debug(
+                        "type-expression has more than one identifier = %s",
+                        type_expression.pretty(),
+                    )
                     self.reporter.report_warning(
                         f"{self.table.full_name}-variable-statement",
-                        f"Unsupported type expression",
+                        "Unsupported type expression",
                     )
                     return None
                 new_identifier: str = tokens[0]
                 fill_token_dict(new_identifier, supported_data_access_func, t_dict)
             else:
-                new_identifier, key_vs_value = self.get_item_selector_tokens(
-                    tree_function.first_expression_func(expression_tree)
+                new_identifier, key_vs_value = self.get_item_selector_tokens(  # type: ignore
+                    cast(Tree, tree_function.first_expression_func(expression_tree))
                 )
                 current_selector: Dict[str, Any] = {
                     f"{new_identifier}": {
                         "item_selectors": [
-                            {
-                                "items": key_vs_value,
-                                "assigned_to": identifier
-                            }
+                            {"items": key_vs_value, "assigned_to": identifier}
                         ],
                         **t_dict,
                     }
                 }
-                fill_token_dict(new_identifier, supported_data_access_func, current_selector)
+                fill_token_dict(
+                    new_identifier, supported_data_access_func, current_selector
+                )
 
         fill_token_dict(identifier, SupportedResolver.get_function_names(), {})
 
@@ -226,7 +249,9 @@ def fill_token_dict(identifier: str, supported_data_access_func: List[str], t_di
     def resolve_to_data_platform_table_list(self) -> List[DataPlatformTable]:
         data_platform_tables: List[DataPlatformTable] = []
 
-        output_variable: Optional[str] = tree_function.get_output_variable(self.parse_tree)
+        output_variable: Optional[str] = tree_function.get_output_variable(
+            self.parse_tree
+        )
         if output_variable is None:
             self.reporter.report_warning(
                 f"{self.table.full_name}-output-variable",
@@ -240,20 +265,27 @@ def resolve_to_data_platform_table_list(self) -> List[DataPlatformTable]:
         for data_access_func in token_dict.keys():
             supported_resolver = SupportedResolver.get_resolver(data_access_func)
             if supported_resolver is None:
-                LOGGER.debug("Resolver not found for the data-access-function %s", data_access_func)
+                LOGGER.debug(
+                    "Resolver not found for the data-access-function %s",
+                    data_access_func,
+                )
                 self.reporter.report_warning(
                     f"{self.table.full_name}-data-access-function",
-                    f"Resolver not found for data-access-function = {data_access_func}"
+                    f"Resolver not found for data-access-function = {data_access_func}",
                 )
                 continue
 
-            table_full_name_creator: AbstractTableFullNameCreator = supported_resolver.get_table_full_name_creator()()
-            for table_full_name in table_full_name_creator.get_full_table_names(token_dict):
+            table_full_name_creator: AbstractTableFullNameCreator = (
+                supported_resolver.get_table_full_name_creator()()
+            )
+            for table_full_name in table_full_name_creator.get_full_table_names(
+                token_dict
+            ):
                 data_platform_tables.append(
                     DataPlatformTable(
                         name=table_full_name.split(".")[-1],
                         full_name=table_full_name,
-                        data_platform_pair=table_full_name_creator.get_platform_pair()
+                        data_platform_pair=table_full_name_creator.get_platform_pair(),
                     )
                 )
 
@@ -278,9 +310,11 @@ def two_level_access_pattern(self, token_dict: Dict[str, Any]) -> List[str]:
         for data_access_function in token_dict:
             arguments: List[str] = tree_function.strip_char_from_list(
                 values=tree_function.remove_whitespaces_from_list(
-                            tree_function.token_values(token_dict[data_access_function]["arg_list"])
-                        ),
-                char="\""
+                    tree_function.token_values(
+                        token_dict[data_access_function]["arg_list"]
+                    )
+                ),
+                char='"',
             )
             # delete arg_list as we consumed it and don't want to process it in next step
             if len(arguments) != 2:
@@ -295,9 +329,7 @@ def two_level_access_pattern(self, token_dict: Dict[str, Any]) -> List[str]:
                 for schema in source_dict["item_selectors"]:
                     schema_name: str = schema["items"]["Schema"]
                     table_name: str = schema["items"]["Item"]
-                    full_table_names.append(
-                        f"{db_name}.{schema_name}.{table_name}"
-                    )
+                    full_table_names.append(f"{db_name}.{schema_name}.{table_name}")
 
         LOGGER.debug("PostgreSQL full-table-names = %s", full_table_names)
 
@@ -322,9 +354,9 @@ def get_full_table_names(self, token_dict: Dict[str, Any]) -> List[str]:
 
         arguments: List[str] = tree_function.strip_char_from_list(
             values=tree_function.remove_whitespaces_from_list(
-                        tree_function.token_values(data_access_dict["arg_list"])
-                    ),
-            char="\""
+                tree_function.token_values(data_access_dict["arg_list"])
+            ),
+            char='"',
         )
 
         if len(arguments) == 2:
@@ -361,9 +393,7 @@ def _get_db_name(self, value: str) -> Optional[str]:
         error_message: str = f"The target argument ({value}) should in the format of <host-name>:<port>/<db-name>[.<domain>]"
         splitter_result: List[str] = value.split("/")
         if len(splitter_result) != 2:
-            self.reporter.report_warning(
-                f"{self.table.full_name}-oracle-target", error_message
-            )
+            LOGGER.debug(error_message)
             return None
 
         db_name = splitter_result[1].split(".")[0]
@@ -377,7 +407,8 @@ def get_full_table_names(self, token_dict: Dict[str, Any]) -> List[str]:
 
         for data_access_function in token_dict:
             arguments: List[str] = tree_function.remove_whitespaces_from_list(
-                tree_function.token_values(token_dict[data_access_function]["arg_list"]))
+                tree_function.token_values(token_dict[data_access_function]["arg_list"])
+            )
             # delete arg_list as we consumed it and don't want to process it in next step
             del token_dict[data_access_function]["arg_list"]
 
@@ -391,7 +422,9 @@ def get_full_table_names(self, token_dict: Dict[str, Any]) -> List[str]:
                 for schema in source_dict["item_selectors"]:
                     schema_name: str = schema["items"]["Schema"]
                     for item_selectors in source_dict[schema["assigned_to"]]:
-                        for item_selector in source_dict[schema["assigned_to"]][item_selectors]:
+                        for item_selector in source_dict[schema["assigned_to"]][
+                            item_selectors
+                        ]:
                             table_name: str = item_selector["items"]["Name"]
                             full_table_names.append(
                                 f"{db_name}.{schema_name}.{table_name}"
@@ -415,13 +448,15 @@ def get_full_table_names(self, token_dict: Dict[str, Any]) -> List[str]:
         for source in data_access_dict:
             for db_its in data_access_dict[source]["item_selectors"]:
                 db_name: str = db_its["items"]["Name"]
-                for schema_its in data_access_dict[source][db_its["assigned_to"]]["item_selectors"]:
+                for schema_its in data_access_dict[source][db_its["assigned_to"]][
+                    "item_selectors"
+                ]:
                     schema_name: str = schema_its["items"]["Name"]
-                    for table_its in data_access_dict[source][db_its["assigned_to"]][schema_its["assigned_to"]]["item_selectors"]:
+                    for table_its in data_access_dict[source][db_its["assigned_to"]][
+                        schema_its["assigned_to"]
+                    ]["item_selectors"]:
                         table_name: str = table_its["items"]["Name"]
-                        full_table_names.append(
-                            f"{db_name}.{schema_name}.{table_name}"
-                        )
+                        full_table_names.append(f"{db_name}.{schema_name}.{table_name}")
 
         LOGGER.debug("Snowflake full-table-name %s", full_table_names)
 
@@ -435,37 +470,49 @@ def get_platform_pair(self) -> DataPlatformPair:
     def get_full_table_names(self, token_dict: Dict[str, Any]) -> List[str]:
         full_table_names: List[str] = []
         data_access_dict: Dict[str, Any] = list(token_dict.values())[0]
-        t1: Tree = tree_function.first_arg_list_func(data_access_dict["arg_list"])
+        t1: Tree = cast(
+            Tree, tree_function.first_arg_list_func(data_access_dict["arg_list"])
+        )
         flat_argument_list: List[Tree] = tree_function.flat_argument_list(t1)
 
         if len(flat_argument_list) != 2:
-            LOGGER.debug("Expecting 2 argument, actual argument count is %s", len(flat_argument_list))
+            LOGGER.debug(
+                "Expecting 2 argument, actual argument count is %s",
+                len(flat_argument_list),
+            )
             LOGGER.debug("Flat argument list = %s", flat_argument_list)
             return full_table_names
 
         data_access_tokens: List[str] = tree_function.remove_whitespaces_from_list(
             tree_function.token_values(flat_argument_list[0])
         )
-        if data_access_tokens[0] != SupportedDataPlatform.SNOWFLAKE.value.powerbi_data_platform_name:
-            LOGGER.debug("Provided native-query data-platform = %s", data_access_tokens[0])
+        if (
+            data_access_tokens[0]
+            != SupportedDataPlatform.SNOWFLAKE.value.powerbi_data_platform_name
+        ):
+            LOGGER.debug(
+                "Provided native-query data-platform = %s", data_access_tokens[0]
+            )
             LOGGER.debug("Only Snowflake is supported in NativeQuery")
             return full_table_names
 
         # First argument is the query
         sql_query: str = tree_function.strip_char_from_list(
             values=tree_function.remove_whitespaces_from_list(
-                        tree_function.token_values(flat_argument_list[1])
-                    ),
-            char="\""
-
-        )[0]  # Remove any whitespaces and double quotes character
+                tree_function.token_values(flat_argument_list[1])
+            ),
+            char='"',
+        )[
+            0
+        ]  # Remove any whitespaces and double quotes character
 
         for table in native_sql_parser.get_tables(sql_query):
             if len(table.split(".")) != 3:
-                LOGGER.debug("Skipping table (%s) as it is not as per full_table_name format", table)
-            full_table_names.append(
-                table
-            )
+                LOGGER.debug(
+                    "Skipping table (%s) as it is not as per full_table_name format",
+                    table,
+                )
+            full_table_names.append(table)
 
         return full_table_names
 
@@ -514,9 +561,7 @@ def get_function_name(self) -> str:
     def get_function_names() -> List[str]:
         functions: List[str] = []
         for supported_resolver in SupportedResolver:
-            functions.append(
-                supported_resolver.get_function_name()
-            )
+            functions.append(supported_resolver.get_function_name())
 
         return functions
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py
index f13688c1bd84a..b6ab6b5261cf3 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py
@@ -1,7 +1,6 @@
 import logging
-from typing import Optional, List, Union, cast, Any
-
 from functools import partial
+from typing import Any, List, Optional, Union, cast
 
 from lark import Token, Tree
 
@@ -136,12 +135,14 @@ def get_all_function_name(tree: Tree) -> List[str]:
 
 
 def flat_argument_list(tree: Tree) -> List[Tree]:
-    values: List[str] = []
+    values: List[Tree] = []
 
     for child in tree.children:
         if isinstance(child, Token):
             continue
-        if isinstance(child, Tree) and (child.data == "argument_list" or child.data == "expression"):
+        if isinstance(child, Tree) and (
+            child.data == "argument_list" or child.data == "expression"
+        ):
             values.append(child)
 
     return values
@@ -155,5 +156,3 @@ def flat_argument_list(tree: Tree) -> List[Tree]:
 first_identifier_func = partial(get_first_rule, rule="identifier")
 first_invoke_expression_func = partial(get_first_rule, rule="invoke_expression")
 first_type_expression_func = partial(get_first_rule, rule="type_expression")
-
-
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/validator.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/validator.py
index 02edab6dac758..abe7d0e46b05a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/validator.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/validator.py
@@ -1,15 +1,16 @@
 import logging
+from typing import List, Optional, Tuple
 
-from datahub.ingestion.source.powerbi.m_query import tree_function
-from datahub.ingestion.source.powerbi.m_query import resolver
-
-from typing import List, Tuple, Optional, Set
 from lark import Tree
 
+from datahub.ingestion.source.powerbi.m_query import resolver, tree_function
+
 LOGGER = logging.getLogger(__name__)
 
 
-def any_one_should_present(supported_funcs: List[str], functions: List[str]) -> Tuple[bool, Optional[str]]:
+def any_one_should_present(
+    supported_funcs: List[str], functions: List[str]
+) -> Tuple[bool, Optional[str]]:
     """
     Anyone functions from supported_funcs should present in functions list
     :param supported_funcs: List of function m_query module supports
@@ -23,7 +24,9 @@ def any_one_should_present(supported_funcs: List[str], functions: List[str]) ->
     return False, f"Function from supported function list {supported_funcs} not found"
 
 
-def all_function_should_be_known(supported_funcs: List[str], functions: List[str]) -> Tuple[bool, Optional[str]]:
+def all_function_should_be_known(
+    supported_funcs: List[str], functions: List[str]
+) -> Tuple[bool, Optional[str]]:
     for f in functions:
         if f not in supported_funcs:
             return False, f"Function {f} is unknown"
@@ -31,7 +34,9 @@ def all_function_should_be_known(supported_funcs: List[str], functions: List[str
     return True, None
 
 
-def validate_parse_tree(tree: Tree, native_query_enabled: bool = True) -> Tuple[bool, str]:
+def validate_parse_tree(
+    tree: Tree, native_query_enabled: bool = True
+) -> Tuple[bool, Optional[str]]:
     """
     :param tree: tree to validate as per functions supported by m_parser module
     :param native_query_enabled: Whether user want to extract lineage from native query
@@ -44,6 +49,6 @@ def validate_parse_tree(tree: Tree, native_query_enabled: bool = True) -> Tuple[
 
     if native_query_enabled is False:
         if resolver.FunctionName.NATIVE_QUERY.value in functions:
-            return False, f"Lineage extraction from native query is disabled."
+            return False, "Lineage extraction from native query is disabled."
 
     return True, None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
index b573956b0fd7b..b0fca163ed000 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
@@ -27,9 +27,7 @@
     PowerBiDashboardSourceConfig,
     PowerBiDashboardSourceReport,
 )
-
-from datahub.ingestion.source.powerbi.m_query import resolver
-from datahub.ingestion.source.powerbi.m_query import parser
+from datahub.ingestion.source.powerbi.m_query import parser, resolver
 from datahub.ingestion.source.powerbi.proxy import PowerBiAPI
 from datahub.metadata.com.linkedin.pegasus2avro.common import ChangeAuditStamps
 from datahub.metadata.schema_classes import (
@@ -162,17 +160,25 @@ def __to_datahub_dataset(
             if self.__config.extract_lineage is True:
                 # Check if upstreams table is available, parse them and create dataset URN for each upstream table
                 upstreams: List[UpstreamClass] = []
-                upstream_tables: List[resolver.DataPlatformTable] = parser.get_upstream_tables(
-                    table, self.__reporter
-                )
+                upstream_tables: List[
+                    resolver.DataPlatformTable
+                ] = parser.get_upstream_tables(table, self.__reporter)
                 for upstream_table in upstream_tables:
-                    if upstream_table.data_platform_pair.powerbi_data_platform_name not in self.__config.dataset_type_mapping[upstream_table.platform_type]:
+                    if (
+                        upstream_table.data_platform_pair.powerbi_data_platform_name
+                        not in self.__config.dataset_type_mapping.keys()
+                    ):
+                        LOGGER.debug("Skipping upstream table for %s", ds_urn)
                         continue
 
                     platform: Union[
                         str, PlatformDetail
-                    ] = self.__config.dataset_type_mapping[upstream_table.platform_type]
-                    platform_name: str = upstream_table.data_platform_pair.datahub_data_platform_name
+                    ] = self.__config.dataset_type_mapping[
+                        upstream_table.data_platform_pair.powerbi_data_platform_name
+                    ]
+                    platform_name: str = (
+                        upstream_table.data_platform_pair.datahub_data_platform_name
+                    )
                     platform_instance_name: Optional[str] = None
                     platform_env: str = DEFAULT_ENV
                     # Determine if PlatformDetail is provided
@@ -731,10 +737,12 @@ def create(cls, config_dict, ctx):
         return cls(config, ctx)
 
     def validate_dataset_type_mapping(self):
-        powerbi_data_platforms: List[str] = [data_platform.get_data_platform_pair().powerbi_data_platform_name for data_platform
-                                             in resolver.SupportedDataPlatform]
+        powerbi_data_platforms: List[str] = [
+            data_platform.value.powerbi_data_platform_name
+            for data_platform in resolver.SupportedDataPlatform
+        ]
 
-        for key in self.source_config.keys():
+        for key in self.source_config.dataset_type_mapping.keys():
             if key not in powerbi_data_platforms:
                 raise ValueError(f"PowerBI DataPlatform {key} is not supported")
 
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index 8b42a924dab05..ac3fec0a6d303 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -2,14 +2,10 @@
 
 from lark import Tree
 
-from datahub.ingestion.source.powerbi.m_query import (
-    parser,
-    tree_function
-)
 from datahub.ingestion.source.powerbi.config import PowerBiDashboardSourceReport
+from datahub.ingestion.source.powerbi.m_query import parser, tree_function
 from datahub.ingestion.source.powerbi.m_query.resolver import (
     DataPlatformTable,
-    SupportedResolver,
     SupportedDataPlatform,
 )
 from datahub.ingestion.source.powerbi.proxy import PowerBiAPI
@@ -174,8 +170,8 @@ def test_oracle_regular_case():
     assert data_platform_tables[0].name == "EMPLOYEES"
     assert data_platform_tables[0].full_name == "salesdb.HR.EMPLOYEES"
     assert (
-            data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
-            == SupportedDataPlatform.ORACLE.value.powerbi_data_platform_name
+        data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
+        == SupportedDataPlatform.ORACLE.value.powerbi_data_platform_name
     )
 
 
@@ -236,8 +232,8 @@ def test_mssql_with_query():
         assert data_platform_tables[0].name == expected_tables[index].split(".")[2]
         assert data_platform_tables[0].full_name == expected_tables[index]
         assert (
-                data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
-                == SupportedDataPlatform.MS_SQL.value.powerbi_data_platform_name
+            data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
+            == SupportedDataPlatform.MS_SQL.value.powerbi_data_platform_name
         )
 
 
@@ -272,8 +268,8 @@ def test_snowflake_native_query():
         assert data_platform_tables[0].name == expected_tables[index].split(".")[2]
         assert data_platform_tables[0].full_name == expected_tables[index]
         assert (
-                data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
-                == SupportedDataPlatform.SNOWFLAKE.value.powerbi_data_platform_name
+            data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
+            == SupportedDataPlatform.SNOWFLAKE.value.powerbi_data_platform_name
         )
 
 
@@ -291,4 +287,3 @@ def test_native_query_disabled():
     )
 
     assert len(data_platform_tables) == 0
-

From bb1dea32080f57b019d35aec58b972135411aa0e Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Mon, 19 Dec 2022 21:09:52 +0530
Subject: [PATCH 29/53] flag for switching native query

---
 .../src/datahub/ingestion/source/powerbi/config.py     | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
index 5d6c3dc0529d7..55e8c92c423e5 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
@@ -118,13 +118,17 @@ class PowerBiAPIConfig(EnvBasedSourceConfigBase):
     extract_ownership: bool = pydantic.Field(
         default=True, description="Whether ownership should be ingested"
     )
+    # Enable/Disable extracting report information
+    extract_reports: bool = pydantic.Field(
+        default=True, description="Whether reports should be ingested"
+    )
     # Enable/Disable extracting lineage information of PowerBI Dataset
     extract_lineage: bool = pydantic.Field(
         default=True, description="Whether lineage should be ingested"
     )
-    # Enable/Disable extracting report information
-    extract_reports: bool = pydantic.Field(
-        default=True, description="Whether reports should be ingested"
+    # Enable/Disable extracting lineage information from PowerBI Native query
+    native_query_parsing: bool = pydantic.Field(
+        default=True, description="Whether PowerBI native query should be parsed to extract lineage"
     )
 
     @validator("dataset_type_mapping")

From 788be4e7e4cfcb5af595e01c30b587f3e9e198b1 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Tue, 20 Dec 2022 12:03:33 +0530
Subject: [PATCH 30/53] update test-cases

---
 .../source/powerbi/m_query/resolver.py        |   2 +-
 .../golden_test_disabled_ownership.json       | 114 ++++++++-
 .../powerbi/golden_test_ingest.json           | 114 ++++++++-
 .../powerbi/golden_test_report.json           | 230 +++++++++++++++++-
 .../integration/powerbi/test_m_parser.py      |   2 +-
 .../tests/integration/powerbi/test_powerbi.py | 108 +++++++-
 6 files changed, 557 insertions(+), 13 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
index 12a216f838b90..a04ff735b9860 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
@@ -74,7 +74,7 @@ def resolve_to_data_platform_table_list(self) -> List[DataPlatformTable]:
         pass
 
 
-class BaseMQueryResolver(AbstractDataAccessMQueryResolver, ABC):
+class MQueryResolver(AbstractDataAccessMQueryResolver, ABC):
     @staticmethod
     def get_item_selector_tokens(
         expression_tree: Tree,
diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json b/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json
index 2154e4d7c2b56..2aeedb1c44090 100644
--- a/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json
@@ -27,13 +27,125 @@
         "runId": "powerbi-test"
     }
 },
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"SNOWFLAKE_TESTTABLE\", \"description\": \"SNOWFLAKE_TESTTABLE\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"snowflake native-query\", \"description\": \"snowflake native-query\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"job-history\", \"description\": \"job-history\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"postgres_test_table\", \"description\": \"postgres_test_table\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
 {
     "entityType": "chart",
     "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
     "changeType": "UPSERT",
     "aspectName": "chartInfo",
     "aspect": {
-        "value": "{\"customProperties\": {\"datasetId\": \"05169CD2-E713-41E6-9600-1D8066D95445\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"test_tile\", \"description\": \"test_tile\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}]}",
+        "value": "{\"customProperties\": {\"datasetId\": \"05169CD2-E713-41E6-9600-1D8066D95445\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"test_tile\", \"description\": \"test_tile\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)\"}]}",
         "contentType": "application/json"
     },
     "systemMetadata": {
diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json b/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json
index 331e4fde518dd..094b612b17299 100644
--- a/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json
@@ -27,6 +27,118 @@
         "runId": "powerbi-test"
     }
 },
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"SNOWFLAKE_TESTTABLE\", \"description\": \"SNOWFLAKE_TESTTABLE\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"snowflake native-query\", \"description\": \"snowflake native-query\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"job-history\", \"description\": \"job-history\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"postgres_test_table\", \"description\": \"postgres_test_table\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
 {
     "entityType": "corpuser",
     "entityUrn": "urn:li:corpuser:users.User1@foo.com",
@@ -117,7 +229,7 @@
     "changeType": "UPSERT",
     "aspectName": "chartInfo",
     "aspect": {
-        "value": "{\"customProperties\": {\"datasetId\": \"05169CD2-E713-41E6-9600-1D8066D95445\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"test_tile\", \"description\": \"test_tile\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}]}",
+        "value": "{\"customProperties\": {\"datasetId\": \"05169CD2-E713-41E6-9600-1D8066D95445\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"test_tile\", \"description\": \"test_tile\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)\"}]}",
         "contentType": "application/json"
     },
     "systemMetadata": {
diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_report.json b/metadata-ingestion/tests/integration/powerbi/golden_test_report.json
index cfafce5d452a5..1f01a5206d8de 100644
--- a/metadata-ingestion/tests/integration/powerbi/golden_test_report.json
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_report.json
@@ -27,6 +27,118 @@
         "runId": "powerbi-test"
     }
 },
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"SNOWFLAKE_TESTTABLE\", \"description\": \"SNOWFLAKE_TESTTABLE\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"snowflake native-query\", \"description\": \"snowflake native-query\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"job-history\", \"description\": \"job-history\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"postgres_test_table\", \"description\": \"postgres_test_table\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
 {
     "entityType": "corpuser",
     "entityUrn": "urn:li:corpuser:users.User1@foo.com",
@@ -117,7 +229,7 @@
     "changeType": "UPSERT",
     "aspectName": "chartInfo",
     "aspect": {
-        "value": "{\"customProperties\": {\"datasetId\": \"05169CD2-E713-41E6-9600-1D8066D95445\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"test_tile\", \"description\": \"test_tile\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}]}",
+        "value": "{\"customProperties\": {\"datasetId\": \"05169CD2-E713-41E6-9600-1D8066D95445\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"test_tile\", \"description\": \"test_tile\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)\"}]}",
         "contentType": "application/json"
     },
     "systemMetadata": {
@@ -251,6 +363,118 @@
         "runId": "powerbi-test"
     }
 },
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"SNOWFLAKE_TESTTABLE\", \"description\": \"SNOWFLAKE_TESTTABLE\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"snowflake native-query\", \"description\": \"snowflake native-query\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"job-history\", \"description\": \"job-history\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"postgres_test_table\", \"description\": \"postgres_test_table\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
 {
     "entityType": "corpuser",
     "entityUrn": "urn:li:corpuser:users.User1@foo.com",
@@ -341,7 +565,7 @@
     "changeType": "UPSERT",
     "aspectName": "chartInfo",
     "aspect": {
-        "value": "{\"customProperties\": {\"order\": \"0\"}, \"title\": \"ReportSection\", \"description\": \"Regional Sales Analysis\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}]}",
+        "value": "{\"customProperties\": {\"order\": \"0\"}, \"title\": \"ReportSection\", \"description\": \"Regional Sales Analysis\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)\"}]}",
         "contentType": "application/json"
     },
     "systemMetadata": {
@@ -369,7 +593,7 @@
     "changeType": "UPSERT",
     "aspectName": "chartInfo",
     "aspect": {
-        "value": "{\"customProperties\": {\"order\": \"1\"}, \"title\": \"ReportSection1\", \"description\": \"Geographic Analysis\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}]}",
+        "value": "{\"customProperties\": {\"order\": \"1\"}, \"title\": \"ReportSection1\", \"description\": \"Geographic Analysis\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)\"}]}",
         "contentType": "application/json"
     },
     "systemMetadata": {
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index ac3fec0a6d303..cea52c6703bb1 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -275,7 +275,7 @@ def test_snowflake_native_query():
 
 def test_native_query_disabled():
     table: PowerBiAPI.Table = PowerBiAPI.Table(
-        expression=M_QUERIES[1],
+        expression=M_QUERIES[1],  # 1st index has the native query
         name="virtual_order_table",
         full_name="OrderDataSet.virtual_order_table",
     )
diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
index acaa2fb77307c..56749dc56971b 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
@@ -106,6 +106,15 @@ def register_mock_api(request_mock):
                 "webUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445",
             },
         },
+        "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed": {
+            "method": "GET",
+            "status_code": 200,
+            "json": {
+                "id": "ba0130a1-5b03-40de-9535-b34e778ea6ed",
+                "name": "hr_pbi_test",
+                "webUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed",
+            },
+        },
         "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/datasources": {
             "method": "GET",
             "status_code": 200,
@@ -141,6 +150,7 @@ def register_mock_api(request_mock):
                         "datasets": [
                             {
                                 "id": "05169CD2-E713-41E6-9600-1D8066D95445",
+                                "name": "test_sf_pbi_test",
                                 "tables": [
                                     {
                                         "name": "public issue_history",
@@ -154,9 +164,95 @@ def register_mock_api(request_mock):
                                                 "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3",
                                             }
                                         ],
-                                    }
+                                    },
+                                    {
+                                        "name": "SNOWFLAKE_TESTTABLE",
+                                        "source": [
+                                            {
+                                                "expression": "let\n    Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n    PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n    TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n    TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n    TESTTABLE_Table",
+                                            }
+                                        ],
+                                        "datasourceUsages": [
+                                            {
+                                                "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3",
+                                            }
+                                        ],
+                                    },
+                                    {
+                                        "name": "snowflake native-query",
+                                        "source": [
+                                            {
+                                                "expression": "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n    #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n    #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n    #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n    #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n    #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n    then [UNIT] * 361\nelse 0),\n    #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n    #\"Added Custom2\"",
+                                            }
+                                        ],
+                                        "datasourceUsages": [
+                                            {
+                                                "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3",
+                                            }
+                                        ],
+                                    },
+                                    {
+                                        "name": "job-history",
+                                        "source": [
+                                            {
+                                                "expression": 'let\n    Source = Oracle.Database("localhost:1521/salesdb.GSLAB.COM", [HierarchicalNavigation=true]), HR = Source{[Schema="HR"]}[Data], EMPLOYEES1 = HR{[Name="EMPLOYEES"]}[Data] \n in EMPLOYEES1',
+                                            }
+                                        ],
+                                        "datasourceUsages": [
+                                            {
+                                                "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3",
+                                            }
+                                        ],
+                                    },
+                                    {
+                                        "name": "postgres_test_table",
+                                        "source": [
+                                            {
+                                                "expression": 'let\n    Source = PostgreSQL.Database("localhost"  ,   "mics"      ),\n  public_order_date =    Source{[Schema="public",Item="order_date"]}[Data] \n in \n public_order_date',
+                                            }
+                                        ],
+                                        "datasourceUsages": [
+                                            {
+                                                "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3",
+                                            }
+                                        ],
+                                    },
+
                                 ],
-                            }
+                            },
+                            {
+                                "id": "ba0130a1-5b03-40de-9535-b34e778ea6ed",
+                                "name": "hr_pbi_test",
+                                "tables": [
+                                    {
+                                        "name": "dbo_book_issue",
+                                        "source": [
+                                            {
+                                                "expression": 'let\n    Source = Sql.Database("localhost", "library"),\n dbo_book_issue = Source{[Schema="dbo",Item="book_issue"]}[Data]\n in dbo_book_issue',
+                                            }
+                                        ],
+                                        "datasourceUsages": [
+                                            {
+                                                "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3",
+                                            }
+                                        ],
+                                    },
+                                    {
+                                        "name": "ms_sql_native_table",
+                                        "source": [
+                                            {
+                                                "expression": 'let\n    Source = Sql.Database("AUPRDWHDB", "COMMOPSDB", [Query="select *,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTH_WID) as CD_AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_MANAGER_CLOSING_MONTH,\'-\',\'\'))), MONTH_WID) as AGENT_KEY#(lf)#(lf)from V_PS_CD_RETENTION", CommandTimeout=#duration(0, 1, 30, 0)]),\n    #"Changed Type" = Table.TransformColumnTypes(Source,{{"mth_date", type date}}),\n    #"Added Custom" = Table.AddColumn(#"Changed Type", "Month", each Date.Month([mth_date])),\n    #"Added Custom1" = Table.AddColumn(#"Added Custom", "TPV Opening", each if [Month] = 1 then [TPV_AMV_OPENING]\nelse if [Month] = 2 then 0\nelse if [Month] = 3 then 0\nelse if [Month] = 4 then [TPV_AMV_OPENING]\nelse if [Month] = 5 then 0\nelse if [Month] = 6 then 0\nelse if [Month] = 7 then [TPV_AMV_OPENING]\nelse if [Month] = 8 then 0\nelse if [Month] = 9 then 0\nelse if [Month] = 10 then [TPV_AMV_OPENING]\nelse if [Month] = 11 then 0\nelse if [Month] = 12 then 0\n\nelse 0)\nin\n    #"Added Custom1"',
+                                            }
+                                        ],
+                                        "datasourceUsages": [
+                                            {
+                                                "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3",
+                                            }
+                                        ],
+                                    },
+
+                                ],
+                            },
                         ],
                     },
                 ]
@@ -221,6 +317,7 @@ def default_source_config():
         "tenant_id": "0B0C960B-FCDF-4D0F-8C45-2E03BB59DDEB",
         "workspace_id": "64ED5CAD-7C10-4684-8180-826122881108",
         "extract_lineage": False,
+        "extract_reports": False,
         "dataset_type_mapping": {
             "PostgreSql": "postgres",
             "Oracle": "oracle",
@@ -243,7 +340,6 @@ def test_powerbi_ingest(mock_msal, pytestconfig, tmp_path, mock_time, requests_m
                 "type": "powerbi",
                 "config": {
                     **default_source_config(),
-                    "extract_reports": False,
                 },
             },
             "sink": {
@@ -283,7 +379,6 @@ def test_override_ownership(
                 "config": {
                     **default_source_config(),
                     "extract_ownership": False,
-                    "extract_reports": False,
                 },
             },
             "sink": {
@@ -320,6 +415,7 @@ def test_extract_reports(mock_msal, pytestconfig, tmp_path, mock_time, requests_
                 "type": "powerbi",
                 "config": {
                     **default_source_config(),
+                    "extract_reports": True,
                 },
             },
             "sink": {
@@ -333,10 +429,10 @@ def test_extract_reports(mock_msal, pytestconfig, tmp_path, mock_time, requests_
 
     pipeline.run()
     pipeline.raise_from_status()
-    mce_out_file = "golden_test_report.json"
+    golden_file = "golden_test_report.json"
 
     mce_helpers.check_golden_file(
         pytestconfig,
         output_path=tmp_path / "powerbi_report_mces.json",
-        golden_path=f"{test_resources_dir}/{mce_out_file}",
+        golden_path=f"{test_resources_dir}/{golden_file}",
     )

From b7dc3cb3e83fd518f17f005b4107f4fd4618700b Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Tue, 20 Dec 2022 13:01:47 +0530
Subject: [PATCH 31/53] lineage test

---
 metadata-ingestion/setup.py                   |   1 +
 .../source/powerbi/m_query/parser.py          |   2 +-
 .../powerbi/golden_test_lineage.json          | 366 ++++++++++++++++++
 .../tests/integration/powerbi/test_powerbi.py |  37 ++
 4 files changed, 405 insertions(+), 1 deletion(-)
 create mode 100644 metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json

diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index de339b99a824b..849641d59ec64 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -609,6 +609,7 @@ def get_long_description():
         "datahub.metadata": ["schema.avsc"],
         "datahub.metadata.schemas": ["*.avsc"],
         "datahub.ingestion.source.feast_image": ["Dockerfile", "requirements.txt"],
+        "datahub.ingestion.source.powerbi": ["powerbi-lexical-grammar.rule"]
     },
     entry_points=entry_points,
     # Dependencies.
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
index 1c4b674d5ef05..1731fa250e0dd 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
@@ -57,7 +57,7 @@ def get_upstream_tables(
         )
         return []
 
-    return resolver.BaseMQueryResolver(
+    return resolver.MQueryResolver(
         table=table,
         parse_tree=parse_tree,
         reporter=reporter,
diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json b/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json
new file mode 100644
index 0000000000000..4ba7ae84d72d2
--- /dev/null
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json
@@ -0,0 +1,366 @@
+[
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"public issue_history\", \"description\": \"public issue_history\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"SNOWFLAKE_TESTTABLE\", \"description\": \"SNOWFLAKE_TESTTABLE\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"snowflake native-query\", \"description\": \"snowflake native-query\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"job-history\", \"description\": \"job-history\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:oracle,salesdb.HR.EMPLOYEES,PROD)\", \"type\": \"TRANSFORMED\"}]}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"postgres_test_table\", \"description\": \"postgres_test_table\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:postgres,mics.public.order_date,PROD)\", \"type\": \"TRANSFORMED\"}]}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "corpuser",
+    "entityUrn": "urn:li:corpuser:users.User1@foo.com",
+    "changeType": "UPSERT",
+    "aspectName": "corpUserInfo",
+    "aspect": {
+        "value": "{\"active\": true, \"displayName\": \"user1\", \"email\": \"User1@foo.com\", \"title\": \"user1\"}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "corpuser",
+    "entityUrn": "urn:li:corpuser:users.User1@foo.com",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "corpuser",
+    "entityUrn": "urn:li:corpuser:users.User1@foo.com",
+    "changeType": "UPSERT",
+    "aspectName": "corpUserKey",
+    "aspect": {
+        "value": "{\"username\": \"User1@foo.com\"}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "corpuser",
+    "entityUrn": "urn:li:corpuser:users.User2@foo.com",
+    "changeType": "UPSERT",
+    "aspectName": "corpUserInfo",
+    "aspect": {
+        "value": "{\"active\": true, \"displayName\": \"user2\", \"email\": \"User2@foo.com\", \"title\": \"user2\"}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "corpuser",
+    "entityUrn": "urn:li:corpuser:users.User2@foo.com",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "corpuser",
+    "entityUrn": "urn:li:corpuser:users.User2@foo.com",
+    "changeType": "UPSERT",
+    "aspectName": "corpUserKey",
+    "aspect": {
+        "value": "{\"username\": \"User2@foo.com\"}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
+    "changeType": "UPSERT",
+    "aspectName": "chartInfo",
+    "aspect": {
+        "value": "{\"customProperties\": {\"datasetId\": \"05169CD2-E713-41E6-9600-1D8066D95445\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"test_tile\", \"description\": \"test_tile\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)\"}]}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
+    "changeType": "UPSERT",
+    "aspectName": "chartKey",
+    "aspect": {
+        "value": "{\"dashboardTool\": \"powerbi\", \"chartId\": \"powerbi.linkedin.com/charts/B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0\"}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "dashboard",
+    "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePaths",
+    "aspect": {
+        "value": "{\"paths\": [\"/powerbi/demo-workspace\"]}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "dashboard",
+    "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
+    "changeType": "UPSERT",
+    "aspectName": "dashboardInfo",
+    "aspect": {
+        "value": "{\"customProperties\": {\"chartCount\": \"1\", \"workspaceName\": \"demo-workspace\", \"workspaceId\": \"7D668CAD-7FFC-4505-9215-655BCA5BEBAE\"}, \"title\": \"test_dashboard\", \"description\": \"test_dashboard\", \"charts\": [\"urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)\"], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"https://localhost/dashboards/web/1\"}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "dashboard",
+    "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "dashboard",
+    "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
+    "changeType": "UPSERT",
+    "aspectName": "dashboardKey",
+    "aspect": {
+        "value": "{\"dashboardTool\": \"powerbi\", \"dashboardId\": \"powerbi.linkedin.com/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE\"}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "dashboard",
+    "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "value": "{\"owners\": [{\"owner\": \"urn:li:corpuser:users.User1@foo.com\", \"type\": \"NONE\"}, {\"owner\": \"urn:li:corpuser:users.User2@foo.com\", \"type\": \"NONE\"}], \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
index 56749dc56971b..22a2c23c05980 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
@@ -436,3 +436,40 @@ def test_extract_reports(mock_msal, pytestconfig, tmp_path, mock_time, requests_
         output_path=tmp_path / "powerbi_report_mces.json",
         golden_path=f"{test_resources_dir}/{golden_file}",
     )
+
+
+@freeze_time(FROZEN_TIME)
+@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
+def test_extract_lineage(mock_msal, pytestconfig, tmp_path, mock_time, requests_mock):
+    test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
+
+    register_mock_api(request_mock=requests_mock)
+
+    pipeline = Pipeline.create(
+        {
+            "run_id": "powerbi-lineage-test",
+            "source": {
+                "type": "powerbi",
+                "config": {
+                    **default_source_config(),
+                    "extract_lineage": True,
+                },
+            },
+            "sink": {
+                "type": "file",
+                "config": {
+                    "filename": f"{tmp_path}/powerbi_lineage_mces.json",
+                },
+            },
+        }
+    )
+
+    pipeline.run()
+    pipeline.raise_from_status()
+    golden_file = "golden_test_lineage.json"
+
+    mce_helpers.check_golden_file(
+        pytestconfig,
+        output_path=tmp_path / "powerbi_lineage_mces.json",
+        golden_path=f"{test_resources_dir}/{golden_file}",
+    )
\ No newline at end of file

From 3656cc065b1808587cc661b1ec6bceb918f6d903 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Tue, 20 Dec 2022 13:45:50 +0530
Subject: [PATCH 32/53] platform instance

---
 .../powerbi/golden_test_lineage.json          | 32 +++++++++++++++++--
 .../tests/integration/powerbi/test_powerbi.py | 15 +++++++++
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json b/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json
index 4ba7ae84d72d2..45b92dee88075 100644
--- a/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json
@@ -55,6 +55,20 @@
         "runId": "powerbi-lineage-test"
     }
 },
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,sn-2.PBI_TEST.TEST.TESTTABLE,PROD)\", \"type\": \"TRANSFORMED\"}]}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
 {
     "entityType": "dataset",
     "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
@@ -83,6 +97,20 @@
         "runId": "powerbi-lineage-test"
     }
 },
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,sn-2.OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4,PROD)\", \"type\": \"TRANSFORMED\"}]}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
 {
     "entityType": "dataset",
     "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
@@ -117,7 +145,7 @@
     "changeType": "UPSERT",
     "aspectName": "upstreamLineage",
     "aspect": {
-        "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:oracle,salesdb.HR.EMPLOYEES,PROD)\", \"type\": \"TRANSFORMED\"}]}",
+        "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:oracle,high_performance_production_unit.salesdb.HR.EMPLOYEES,PROD)\", \"type\": \"TRANSFORMED\"}]}",
         "contentType": "application/json"
     },
     "systemMetadata": {
@@ -159,7 +187,7 @@
     "changeType": "UPSERT",
     "aspectName": "upstreamLineage",
     "aspect": {
-        "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:postgres,mics.public.order_date,PROD)\", \"type\": \"TRANSFORMED\"}]}",
+        "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:postgres,operational_instance.mics.public.order_date,PROD)\", \"type\": \"TRANSFORMED\"}]}",
         "contentType": "application/json"
     },
     "systemMetadata": {
diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
index 22a2c23c05980..e6f112b8d0ef9 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
@@ -453,6 +453,21 @@ def test_extract_lineage(mock_msal, pytestconfig, tmp_path, mock_time, requests_
                 "config": {
                     **default_source_config(),
                     "extract_lineage": True,
+                    "dataset_type_mapping": {
+                        "PostgreSql": {
+                            "platform_instance": "operational_instance"
+                        },
+                        "Oracle": {
+                            "platform_instance": "high_performance_production_unit"
+                        },
+                        "Sql": {
+                            "platform_instance": "reporting-db"
+                        },
+                        "Snowflake": {
+                            "platform_instance": "sn-2"
+                        },
+                    },
+
                 },
             },
             "sink": {

From 1433b605f6a0f89b7f67c98250f5600e80500a11 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Tue, 20 Dec 2022 14:09:55 +0530
Subject: [PATCH 33/53] integration test

---
 .../golden_test_disabled_ownership.json       | 102 +++++++++++++-
 .../powerbi/golden_test_ingest.json           | 102 +++++++++++++-
 .../powerbi/golden_test_lineage.json          | 128 +++++++++++++++++-
 .../powerbi/golden_test_report.json           | 102 +++++++++++++-
 .../tests/integration/powerbi/test_powerbi.py |   6 +
 5 files changed, 433 insertions(+), 7 deletions(-)

diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json b/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json
index 2aeedb1c44090..f913484fb85f9 100644
--- a/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json
@@ -139,6 +139,62 @@
         "runId": "powerbi-test"
     }
 },
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"dbo_book_issue\", \"description\": \"dbo_book_issue\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"ms_sql_native_table\", \"description\": \"ms_sql_native_table\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
 {
     "entityType": "chart",
     "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
@@ -181,6 +237,48 @@
         "runId": "powerbi-test"
     }
 },
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+    "changeType": "UPSERT",
+    "aspectName": "chartInfo",
+    "aspect": {
+        "value": "{\"customProperties\": {\"datasetId\": \"ba0130a1-5b03-40de-9535-b34e778ea6ed\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"yearly_sales\", \"description\": \"yearly_sales\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)\"}]}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+    "changeType": "UPSERT",
+    "aspectName": "chartKey",
+    "aspect": {
+        "value": "{\"dashboardTool\": \"powerbi\", \"chartId\": \"powerbi.linkedin.com/charts/23212598-23b5-4980-87cc-5fc0ecd84385\"}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
 {
     "entityType": "dashboard",
     "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
@@ -201,7 +299,7 @@
     "changeType": "UPSERT",
     "aspectName": "dashboardInfo",
     "aspect": {
-        "value": "{\"customProperties\": {\"chartCount\": \"1\", \"workspaceName\": \"demo-workspace\", \"workspaceId\": \"7D668CAD-7FFC-4505-9215-655BCA5BEBAE\"}, \"title\": \"test_dashboard\", \"description\": \"test_dashboard\", \"charts\": [\"urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)\"], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"https://localhost/dashboards/web/1\"}",
+        "value": "{\"customProperties\": {\"chartCount\": \"2\", \"workspaceName\": \"demo-workspace\", \"workspaceId\": \"7D668CAD-7FFC-4505-9215-655BCA5BEBAE\"}, \"title\": \"test_dashboard\", \"description\": \"test_dashboard\", \"charts\": [\"urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)\", \"urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)\"], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"https://localhost/dashboards/web/1\"}",
         "contentType": "application/json"
     },
     "systemMetadata": {
@@ -237,4 +335,4 @@
         "runId": "powerbi-test"
     }
 }
-]
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json b/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json
index 094b612b17299..c89ba31b30a2f 100644
--- a/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json
@@ -139,6 +139,62 @@
         "runId": "powerbi-test"
     }
 },
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"dbo_book_issue\", \"description\": \"dbo_book_issue\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"ms_sql_native_table\", \"description\": \"ms_sql_native_table\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
 {
     "entityType": "corpuser",
     "entityUrn": "urn:li:corpuser:users.User1@foo.com",
@@ -265,6 +321,48 @@
         "runId": "powerbi-test"
     }
 },
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+    "changeType": "UPSERT",
+    "aspectName": "chartInfo",
+    "aspect": {
+        "value": "{\"customProperties\": {\"datasetId\": \"ba0130a1-5b03-40de-9535-b34e778ea6ed\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"yearly_sales\", \"description\": \"yearly_sales\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)\"}]}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+    "changeType": "UPSERT",
+    "aspectName": "chartKey",
+    "aspect": {
+        "value": "{\"dashboardTool\": \"powerbi\", \"chartId\": \"powerbi.linkedin.com/charts/23212598-23b5-4980-87cc-5fc0ecd84385\"}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
 {
     "entityType": "dashboard",
     "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
@@ -285,7 +383,7 @@
     "changeType": "UPSERT",
     "aspectName": "dashboardInfo",
     "aspect": {
-        "value": "{\"customProperties\": {\"chartCount\": \"1\", \"workspaceName\": \"demo-workspace\", \"workspaceId\": \"7D668CAD-7FFC-4505-9215-655BCA5BEBAE\"}, \"title\": \"test_dashboard\", \"description\": \"test_dashboard\", \"charts\": [\"urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)\"], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"https://localhost/dashboards/web/1\"}",
+        "value": "{\"customProperties\": {\"chartCount\": \"2\", \"workspaceName\": \"demo-workspace\", \"workspaceId\": \"7D668CAD-7FFC-4505-9215-655BCA5BEBAE\"}, \"title\": \"test_dashboard\", \"description\": \"test_dashboard\", \"charts\": [\"urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)\", \"urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)\"], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"https://localhost/dashboards/web/1\"}",
         "contentType": "application/json"
     },
     "systemMetadata": {
@@ -335,4 +433,4 @@
         "runId": "powerbi-test"
     }
 }
-]
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json b/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json
index 45b92dee88075..85fce7f7d4394 100644
--- a/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json
@@ -195,6 +195,90 @@
         "runId": "powerbi-lineage-test"
     }
 },
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"dbo_book_issue\", \"description\": \"dbo_book_issue\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:mssql,reporting-db.library.dbo.book_issue,PROD)\", \"type\": \"TRANSFORMED\"}]}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"ms_sql_native_table\", \"description\": \"ms_sql_native_table\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:mssql,reporting-db.COMMOPSDB.dbo.V_PS_CD_RETENTION,PROD)\", \"type\": \"TRANSFORMED\"}]}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
 {
     "entityType": "corpuser",
     "entityUrn": "urn:li:corpuser:users.User1@foo.com",
@@ -321,6 +405,48 @@
         "runId": "powerbi-lineage-test"
     }
 },
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+    "changeType": "UPSERT",
+    "aspectName": "chartInfo",
+    "aspect": {
+        "value": "{\"customProperties\": {\"datasetId\": \"ba0130a1-5b03-40de-9535-b34e778ea6ed\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"yearly_sales\", \"description\": \"yearly_sales\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)\"}]}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+    "changeType": "UPSERT",
+    "aspectName": "chartKey",
+    "aspect": {
+        "value": "{\"dashboardTool\": \"powerbi\", \"chartId\": \"powerbi.linkedin.com/charts/23212598-23b5-4980-87cc-5fc0ecd84385\"}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
 {
     "entityType": "dashboard",
     "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
@@ -341,7 +467,7 @@
     "changeType": "UPSERT",
     "aspectName": "dashboardInfo",
     "aspect": {
-        "value": "{\"customProperties\": {\"chartCount\": \"1\", \"workspaceName\": \"demo-workspace\", \"workspaceId\": \"7D668CAD-7FFC-4505-9215-655BCA5BEBAE\"}, \"title\": \"test_dashboard\", \"description\": \"test_dashboard\", \"charts\": [\"urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)\"], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"https://localhost/dashboards/web/1\"}",
+        "value": "{\"customProperties\": {\"chartCount\": \"2\", \"workspaceName\": \"demo-workspace\", \"workspaceId\": \"7D668CAD-7FFC-4505-9215-655BCA5BEBAE\"}, \"title\": \"test_dashboard\", \"description\": \"test_dashboard\", \"charts\": [\"urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)\", \"urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)\"], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"https://localhost/dashboards/web/1\"}",
         "contentType": "application/json"
     },
     "systemMetadata": {
diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_report.json b/metadata-ingestion/tests/integration/powerbi/golden_test_report.json
index 1f01a5206d8de..43707cec35e2e 100644
--- a/metadata-ingestion/tests/integration/powerbi/golden_test_report.json
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_report.json
@@ -139,6 +139,62 @@
         "runId": "powerbi-test"
     }
 },
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"dbo_book_issue\", \"description\": \"dbo_book_issue\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"ms_sql_native_table\", \"description\": \"ms_sql_native_table\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
 {
     "entityType": "corpuser",
     "entityUrn": "urn:li:corpuser:users.User1@foo.com",
@@ -265,6 +321,48 @@
         "runId": "powerbi-test"
     }
 },
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+    "changeType": "UPSERT",
+    "aspectName": "chartInfo",
+    "aspect": {
+        "value": "{\"customProperties\": {\"datasetId\": \"ba0130a1-5b03-40de-9535-b34e778ea6ed\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"yearly_sales\", \"description\": \"yearly_sales\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)\"}]}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+    "changeType": "UPSERT",
+    "aspectName": "chartKey",
+    "aspect": {
+        "value": "{\"dashboardTool\": \"powerbi\", \"chartId\": \"powerbi.linkedin.com/charts/23212598-23b5-4980-87cc-5fc0ecd84385\"}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
 {
     "entityType": "dashboard",
     "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
@@ -285,7 +383,7 @@
     "changeType": "UPSERT",
     "aspectName": "dashboardInfo",
     "aspect": {
-        "value": "{\"customProperties\": {\"chartCount\": \"1\", \"workspaceName\": \"demo-workspace\", \"workspaceId\": \"7D668CAD-7FFC-4505-9215-655BCA5BEBAE\"}, \"title\": \"test_dashboard\", \"description\": \"test_dashboard\", \"charts\": [\"urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)\"], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"https://localhost/dashboards/web/1\"}",
+        "value": "{\"customProperties\": {\"chartCount\": \"2\", \"workspaceName\": \"demo-workspace\", \"workspaceId\": \"7D668CAD-7FFC-4505-9215-655BCA5BEBAE\"}, \"title\": \"test_dashboard\", \"description\": \"test_dashboard\", \"charts\": [\"urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)\", \"urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)\"], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"https://localhost/dashboards/web/1\"}",
         "contentType": "application/json"
     },
     "systemMetadata": {
@@ -699,4 +797,4 @@
         "runId": "powerbi-test"
     }
 }
-]
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
index e6f112b8d0ef9..bb09f544309d3 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
@@ -94,6 +94,12 @@ def register_mock_api(request_mock):
                         "embedUrl": "https://localhost/tiles/embed/1",
                         "datasetId": "05169CD2-E713-41E6-9600-1D8066D95445",
                     },
+                    {
+                        "id": "23212598-23b5-4980-87cc-5fc0ecd84385",
+                        "title": "yearly_sales",
+                        "embedUrl": "https://localhost/tiles/embed/2",
+                        "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed",
+                    }
                 ]
             },
         },

From 979b45753ffb29323b64d98a90578adc22a7f17c Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Tue, 20 Dec 2022 14:22:54 +0530
Subject: [PATCH 34/53] lint fix

---
 .../ingestion/source/powerbi/config.py        |  3 ++-
 .../tests/integration/powerbi/test_powerbi.py | 23 ++++++-------------
 2 files changed, 9 insertions(+), 17 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
index 55e8c92c423e5..fd9725801549d 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
@@ -128,7 +128,8 @@ class PowerBiAPIConfig(EnvBasedSourceConfigBase):
     )
     # Enable/Disable extracting lineage information from PowerBI Native query
     native_query_parsing: bool = pydantic.Field(
-        default=True, description="Whether PowerBI native query should be parsed to extract lineage"
+        default=True,
+        description="Whether PowerBI native query should be parsed to extract lineage",
     )
 
     @validator("dataset_type_mapping")
diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
index bb09f544309d3..a3e0dc99674ec 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
@@ -99,7 +99,7 @@ def register_mock_api(request_mock):
                         "title": "yearly_sales",
                         "embedUrl": "https://localhost/tiles/embed/2",
                         "datasetId": "ba0130a1-5b03-40de-9535-b34e778ea6ed",
-                    }
+                    },
                 ]
             },
         },
@@ -175,7 +175,7 @@ def register_mock_api(request_mock):
                                         "name": "SNOWFLAKE_TESTTABLE",
                                         "source": [
                                             {
-                                                "expression": "let\n    Source = Snowflake.Databases(\"hp123rt5.ap-southeast-2.fakecomputing.com\",\"PBI_TEST_WAREHOUSE_PROD\",[Role=\"PBI_TEST_MEMBER\"]),\n    PBI_TEST_Database = Source{[Name=\"PBI_TEST\",Kind=\"Database\"]}[Data],\n    TEST_Schema = PBI_TEST_Database{[Name=\"TEST\",Kind=\"Schema\"]}[Data],\n    TESTTABLE_Table = TEST_Schema{[Name=\"TESTTABLE\",Kind=\"Table\"]}[Data]\nin\n    TESTTABLE_Table",
+                                                "expression": 'let\n    Source = Snowflake.Databases("hp123rt5.ap-southeast-2.fakecomputing.com","PBI_TEST_WAREHOUSE_PROD",[Role="PBI_TEST_MEMBER"]),\n    PBI_TEST_Database = Source{[Name="PBI_TEST",Kind="Database"]}[Data],\n    TEST_Schema = PBI_TEST_Database{[Name="TEST",Kind="Schema"]}[Data],\n    TESTTABLE_Table = TEST_Schema{[Name="TESTTABLE",Kind="Table"]}[Data]\nin\n    TESTTABLE_Table',
                                             }
                                         ],
                                         "datasourceUsages": [
@@ -188,7 +188,7 @@ def register_mock_api(request_mock):
                                         "name": "snowflake native-query",
                                         "source": [
                                             {
-                                                "expression": "let\n    Source = Value.NativeQuery(Snowflake.Databases(\"bu20658.ap-southeast-2.snowflakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4\", null, [EnableFolding=true]),\n    #\"Added Conditional Column\" = Table.AddColumn(Source, \"SME Units ENT\", each if [DEAL_TYPE] = \"SME Unit\" then [UNIT] else 0),\n    #\"Added Conditional Column1\" = Table.AddColumn(#\"Added Conditional Column\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" then [UNIT] else 0),\n    #\"Removed Columns\" = Table.RemoveColumns(#\"Added Conditional Column1\",{\"Banklink Units\"}),\n    #\"Added Custom\" = Table.AddColumn(#\"Removed Columns\", \"Banklink Units\", each if [DEAL_TYPE] = \"Banklink\" and [SALES_TYPE] = \"3 - Upsell\"\nthen [UNIT]\n\nelse if [SALES_TYPE] = \"Adjusted BL Migration\"\nthen [UNIT]\n\nelse 0),\n    #\"Added Custom1\" = Table.AddColumn(#\"Added Custom\", \"SME Units in $ (*$361)\", each if [DEAL_TYPE] = \"SME Unit\" \nand [SALES_TYPE] <> \"4 - Renewal\"\n    then [UNIT] * 361\nelse 0),\n    #\"Added Custom2\" = Table.AddColumn(#\"Added Custom1\", \"Banklink in $ (*$148)\", each [Banklink Units] * 148)\nin\n    #\"Added Custom2\"",
+                                                "expression": 'let\n    Source = Value.NativeQuery(Snowflake.Databases("bu20658.ap-southeast-2.snowflakecomputing.com","operations_analytics_warehouse_prod",[Role="OPERATIONS_ANALYTICS_MEMBER"]){[Name="OPERATIONS_ANALYTICS"]}[Data], "SELECT#(lf)concat((UPPER(REPLACE(SELLER,\'-\',\'\'))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,\'-\',\'\'))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4", null, [EnableFolding=true]),\n    #"Added Conditional Column" = Table.AddColumn(Source, "SME Units ENT", each if [DEAL_TYPE] = "SME Unit" then [UNIT] else 0),\n    #"Added Conditional Column1" = Table.AddColumn(#"Added Conditional Column", "Banklink Units", each if [DEAL_TYPE] = "Banklink" then [UNIT] else 0),\n    #"Removed Columns" = Table.RemoveColumns(#"Added Conditional Column1",{"Banklink Units"}),\n    #"Added Custom" = Table.AddColumn(#"Removed Columns", "Banklink Units", each if [DEAL_TYPE] = "Banklink" and [SALES_TYPE] = "3 - Upsell"\nthen [UNIT]\n\nelse if [SALES_TYPE] = "Adjusted BL Migration"\nthen [UNIT]\n\nelse 0),\n    #"Added Custom1" = Table.AddColumn(#"Added Custom", "SME Units in $ (*$361)", each if [DEAL_TYPE] = "SME Unit" \nand [SALES_TYPE] <> "4 - Renewal"\n    then [UNIT] * 361\nelse 0),\n    #"Added Custom2" = Table.AddColumn(#"Added Custom1", "Banklink in $ (*$148)", each [Banklink Units] * 148)\nin\n    #"Added Custom2"',
                                             }
                                         ],
                                         "datasourceUsages": [
@@ -223,7 +223,6 @@ def register_mock_api(request_mock):
                                             }
                                         ],
                                     },
-
                                 ],
                             },
                             {
@@ -256,7 +255,6 @@ def register_mock_api(request_mock):
                                             }
                                         ],
                                     },
-
                                 ],
                             },
                         ],
@@ -460,20 +458,13 @@ def test_extract_lineage(mock_msal, pytestconfig, tmp_path, mock_time, requests_
                     **default_source_config(),
                     "extract_lineage": True,
                     "dataset_type_mapping": {
-                        "PostgreSql": {
-                            "platform_instance": "operational_instance"
-                        },
+                        "PostgreSql": {"platform_instance": "operational_instance"},
                         "Oracle": {
                             "platform_instance": "high_performance_production_unit"
                         },
-                        "Sql": {
-                            "platform_instance": "reporting-db"
-                        },
-                        "Snowflake": {
-                            "platform_instance": "sn-2"
-                        },
+                        "Sql": {"platform_instance": "reporting-db"},
+                        "Snowflake": {"platform_instance": "sn-2"},
                     },
-
                 },
             },
             "sink": {
@@ -493,4 +484,4 @@ def test_extract_lineage(mock_msal, pytestconfig, tmp_path, mock_time, requests_
         pytestconfig,
         output_path=tmp_path / "powerbi_lineage_mces.json",
         golden_path=f"{test_resources_dir}/{golden_file}",
-    )
\ No newline at end of file
+    )

From 955245cbdd522c737d6eae3f90a5eeb093511c3d Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Tue, 20 Dec 2022 16:28:24 +0530
Subject: [PATCH 35/53] lint fix

---
 .../ingestion/source/powerbi/config.py        |  6 ++---
 .../ingestion/source/powerbi/powerbi.py       |  4 +--
 .../datahub/ingestion/source/powerbi/proxy.py | 26 +++++++++++++++++++
 3 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
index a652bb42afbe6..448c14700bcb5 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
@@ -1,17 +1,15 @@
 import logging
-
 from dataclasses import dataclass, field as dataclass_field
 from typing import Dict, List, Union
 
 import pydantic
 from pydantic import validator
+from pydantic.class_validators import root_validator
 
 import datahub.emitter.mce_builder as builder
+from datahub.configuration.common import AllowDenyPattern
 from datahub.configuration.source_common import DEFAULT_ENV, EnvBasedSourceConfigBase
 from datahub.ingestion.api.source import SourceReport
-from pydantic.class_validators import root_validator
-
-from datahub.configuration.common import AllowDenyPattern
 
 LOGGER = logging.getLogger(__name__)
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
index e4d457df16ac5..373591ee7e09d 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
@@ -764,9 +764,7 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
         # Fetch PowerBi workspace for given workspace identifier
         for workspace_id in self.get_workspace_ids():
             LOGGER.info(f"Scanning workspace id: {workspace_id}")
-            workspace = self.powerbi_client.get_workspace(
-                workspace_id, self.reporter
-            )
+            workspace = self.powerbi_client.get_workspace(workspace_id, self.reporter)
 
             for dashboard in workspace.dashboards:
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/proxy.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/proxy.py
index 1b644a4fb4265..a7e027551290a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/proxy.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/proxy.py
@@ -684,6 +684,32 @@ def get_reports(
 
         return reports
 
+    def get_groups(self):
+        group_endpoint = PowerBiAPI.BASE_URL
+        # Hit PowerBi
+        LOGGER.info(f"Request to get groups endpoint URL={group_endpoint}")
+        response = requests.get(
+            group_endpoint,
+            headers={Constant.Authorization: self.get_access_token()},
+        )
+        response.raise_for_status()
+        return response.json()
+
+    def get_workspaces(self):
+        groups = self.get_groups()
+        workspaces = [
+            PowerBiAPI.Workspace(
+                id=workspace.get("id"),
+                name=workspace.get("name"),
+                state="",
+                datasets={},
+                dashboards=[],
+            )
+            for workspace in groups.get("value", [])
+            if workspace.get("type", None) == "Workspace"
+        ]
+        return workspaces
+
     # flake8: noqa: C901
     def get_workspace(
         self, workspace_id: str, reporter: PowerBiDashboardSourceReport

From b53de60710b649c5e9f63d5e373035f6f602a08c Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Tue, 20 Dec 2022 17:28:12 +0530
Subject: [PATCH 36/53] fix golden files

---
 .../golden_test_disabled_ownership.json       |   2 +-
 .../powerbi/golden_test_ingest.json           |   4 +-
 .../powerbi/golden_test_lineage.json          |   2 +-
 .../powerbi/golden_test_report.json           |   2 +-
 .../golden_test_scan_all_workspaces.json      | 331 +++++++++++++-----
 .../tests/integration/powerbi/test_powerbi.py |  42 ++-
 6 files changed, 291 insertions(+), 92 deletions(-)

diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json b/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json
index f913484fb85f9..528477ca3d945 100644
--- a/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json
@@ -299,7 +299,7 @@
     "changeType": "UPSERT",
     "aspectName": "dashboardInfo",
     "aspect": {
-        "value": "{\"customProperties\": {\"chartCount\": \"2\", \"workspaceName\": \"demo-workspace\", \"workspaceId\": \"7D668CAD-7FFC-4505-9215-655BCA5BEBAE\"}, \"title\": \"test_dashboard\", \"description\": \"test_dashboard\", \"charts\": [\"urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)\", \"urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)\"], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"https://localhost/dashboards/web/1\"}",
+        "value": "{\"customProperties\": {\"chartCount\": \"2\", \"workspaceName\": \"demo-workspace\", \"workspaceId\": \"64ED5CAD-7C10-4684-8180-826122881108\"}, \"title\": \"test_dashboard\", \"description\": \"test_dashboard\", \"charts\": [\"urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)\", \"urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)\"], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"https://localhost/dashboards/web/1\"}",
         "contentType": "application/json"
     },
     "systemMetadata": {
diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json b/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json
index c89ba31b30a2f..4646baa3ad141 100644
--- a/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json
@@ -383,7 +383,7 @@
     "changeType": "UPSERT",
     "aspectName": "dashboardInfo",
     "aspect": {
-        "value": "{\"customProperties\": {\"chartCount\": \"2\", \"workspaceName\": \"demo-workspace\", \"workspaceId\": \"7D668CAD-7FFC-4505-9215-655BCA5BEBAE\"}, \"title\": \"test_dashboard\", \"description\": \"test_dashboard\", \"charts\": [\"urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)\", \"urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)\"], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"https://localhost/dashboards/web/1\"}",
+        "value": "{\"customProperties\": {\"chartCount\": \"2\", \"workspaceName\": \"demo-workspace\", \"workspaceId\": \"64ED5CAD-7C10-4684-8180-826122881108\"}, \"title\": \"test_dashboard\", \"description\": \"test_dashboard\", \"charts\": [\"urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)\", \"urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)\"], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"https://localhost/dashboards/web/1\"}",
         "contentType": "application/json"
     },
     "systemMetadata": {
@@ -433,4 +433,4 @@
         "runId": "powerbi-test"
     }
 }
-]
\ No newline at end of file
+]
diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json b/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json
index 85fce7f7d4394..d59d38b7d17a9 100644
--- a/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json
@@ -467,7 +467,7 @@
     "changeType": "UPSERT",
     "aspectName": "dashboardInfo",
     "aspect": {
-        "value": "{\"customProperties\": {\"chartCount\": \"2\", \"workspaceName\": \"demo-workspace\", \"workspaceId\": \"7D668CAD-7FFC-4505-9215-655BCA5BEBAE\"}, \"title\": \"test_dashboard\", \"description\": \"test_dashboard\", \"charts\": [\"urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)\", \"urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)\"], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"https://localhost/dashboards/web/1\"}",
+        "value": "{\"customProperties\": {\"chartCount\": \"2\", \"workspaceName\": \"demo-workspace\", \"workspaceId\": \"64ED5CAD-7C10-4684-8180-826122881108\"}, \"title\": \"test_dashboard\", \"description\": \"test_dashboard\", \"charts\": [\"urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)\", \"urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)\"], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"https://localhost/dashboards/web/1\"}",
         "contentType": "application/json"
     },
     "systemMetadata": {
diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_report.json b/metadata-ingestion/tests/integration/powerbi/golden_test_report.json
index 43707cec35e2e..9092d5bc6ea7f 100644
--- a/metadata-ingestion/tests/integration/powerbi/golden_test_report.json
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_report.json
@@ -383,7 +383,7 @@
     "changeType": "UPSERT",
     "aspectName": "dashboardInfo",
     "aspect": {
-        "value": "{\"customProperties\": {\"chartCount\": \"2\", \"workspaceName\": \"demo-workspace\", \"workspaceId\": \"7D668CAD-7FFC-4505-9215-655BCA5BEBAE\"}, \"title\": \"test_dashboard\", \"description\": \"test_dashboard\", \"charts\": [\"urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)\", \"urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)\"], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"https://localhost/dashboards/web/1\"}",
+        "value": "{\"customProperties\": {\"chartCount\": \"2\", \"workspaceName\": \"demo-workspace\", \"workspaceId\": \"64ED5CAD-7C10-4684-8180-826122881108\"}, \"title\": \"test_dashboard\", \"description\": \"test_dashboard\", \"charts\": [\"urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)\", \"urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)\"], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"https://localhost/dashboards/web/1\"}",
         "contentType": "application/json"
     },
     "systemMetadata": {
diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_scan_all_workspaces.json b/metadata-ingestion/tests/integration/powerbi/golden_test_scan_all_workspaces.json
index afa2b182168d1..255a907e39b8f 100644
--- a/metadata-ingestion/tests/integration/powerbi/golden_test_scan_all_workspaces.json
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_scan_all_workspaces.json
@@ -1,28 +1,49 @@
 [
 {
-    "auditHeader": null,
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,library_db.public.issue_history,DEV)",
-    "entityKeyAspect": null,
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
     "changeType": "UPSERT",
     "aspectName": "datasetProperties",
     "aspect": {
-        "value": "{\"customProperties\": {}, \"description\": \"issue_history\", \"tags\": []}",
+        "value": "{\"customProperties\": {}, \"name\": \"public issue_history\", \"description\": \"public issue_history\", \"tags\": []}",
         "contentType": "application/json"
     },
     "systemMetadata": {
         "lastObserved": 1643871600000,
-        "runId": "powerbi-test",
-        "registryName": null,
-        "registryVersion": null,
-        "properties": null
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"SNOWFLAKE_TESTTABLE\", \"description\": \"SNOWFLAKE_TESTTABLE\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
     }
 },
 {
-    "auditHeader": null,
     "entityType": "dataset",
-    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,library_db.public.issue_history,DEV)",
-    "entityKeyAspect": null,
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)",
     "changeType": "UPSERT",
     "aspectName": "status",
     "aspect": {
@@ -31,36 +52,166 @@
     },
     "systemMetadata": {
         "lastObserved": 1643871600000,
-        "runId": "powerbi-test",
-        "registryName": null,
-        "registryVersion": null,
-        "properties": null
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"snowflake native-query\", \"description\": \"snowflake native-query\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"job-history\", \"description\": \"job-history\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"postgres_test_table\", \"description\": \"postgres_test_table\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"dbo_book_issue\", \"description\": \"dbo_book_issue\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"ms_sql_native_table\", \"description\": \"ms_sql_native_table\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
     }
 },
 {
-    "auditHeader": null,
     "entityType": "chart",
     "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
-    "entityKeyAspect": null,
     "changeType": "UPSERT",
     "aspectName": "chartInfo",
     "aspect": {
-        "value": "{\"customProperties\": {\"datasetId\": \"05169CD2-E713-41E6-9600-1D8066D95445\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"test_tile\", \"description\": \"test_tile\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:postgres,library_db.public.issue_history,DEV)\"}]}",
+        "value": "{\"customProperties\": {\"datasetId\": \"05169CD2-E713-41E6-9600-1D8066D95445\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"test_tile\", \"description\": \"test_tile\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)\"}]}",
         "contentType": "application/json"
     },
     "systemMetadata": {
         "lastObserved": 1643871600000,
-        "runId": "powerbi-test",
-        "registryName": null,
-        "registryVersion": null,
-        "properties": null
+        "runId": "powerbi-test"
     }
 },
 {
-    "auditHeader": null,
     "entityType": "chart",
     "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
-    "entityKeyAspect": null,
     "changeType": "UPSERT",
     "aspectName": "status",
     "aspect": {
@@ -69,17 +220,12 @@
     },
     "systemMetadata": {
         "lastObserved": 1643871600000,
-        "runId": "powerbi-test",
-        "registryName": null,
-        "registryVersion": null,
-        "properties": null
+        "runId": "powerbi-test"
     }
 },
 {
-    "auditHeader": null,
     "entityType": "chart",
     "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
-    "entityKeyAspect": null,
     "changeType": "UPSERT",
     "aspectName": "chartKey",
     "aspect": {
@@ -88,17 +234,54 @@
     },
     "systemMetadata": {
         "lastObserved": 1643871600000,
-        "runId": "powerbi-test",
-        "registryName": null,
-        "registryVersion": null,
-        "properties": null
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+    "changeType": "UPSERT",
+    "aspectName": "chartInfo",
+    "aspect": {
+        "value": "{\"customProperties\": {\"datasetId\": \"ba0130a1-5b03-40de-9535-b34e778ea6ed\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"yearly_sales\", \"description\": \"yearly_sales\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)\"}]}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+    "changeType": "UPSERT",
+    "aspectName": "chartKey",
+    "aspect": {
+        "value": "{\"dashboardTool\": \"powerbi\", \"chartId\": \"powerbi.linkedin.com/charts/23212598-23b5-4980-87cc-5fc0ecd84385\"}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
     }
 },
 {
-    "auditHeader": null,
     "entityType": "dashboard",
     "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
-    "entityKeyAspect": null,
     "changeType": "UPSERT",
     "aspectName": "browsePaths",
     "aspect": {
@@ -107,36 +290,26 @@
     },
     "systemMetadata": {
         "lastObserved": 1643871600000,
-        "runId": "powerbi-test",
-        "registryName": null,
-        "registryVersion": null,
-        "properties": null
+        "runId": "powerbi-test"
     }
 },
 {
-    "auditHeader": null,
     "entityType": "dashboard",
     "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
-    "entityKeyAspect": null,
     "changeType": "UPSERT",
     "aspectName": "dashboardInfo",
     "aspect": {
-        "value": "{\"customProperties\": {\"chartCount\": \"1\", \"workspaceName\": \"demo-workspace\", \"workspaceId\": \"7D668CAD-7FFC-4505-9215-655BCA5BEBAE\"}, \"title\": \"test_dashboard\", \"description\": \"test_dashboard\", \"charts\": [\"urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)\"], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"https://localhost/dashboards/web/1\"}",
+        "value": "{\"customProperties\": {\"chartCount\": \"2\", \"workspaceName\": \"demo-workspace\", \"workspaceId\": \"64ED5CAD-7C10-4684-8180-826122881108\"}, \"title\": \"test_dashboard\", \"description\": \"test_dashboard\", \"charts\": [\"urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)\", \"urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)\"], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"https://localhost/dashboards/web/1\"}",
         "contentType": "application/json"
     },
     "systemMetadata": {
         "lastObserved": 1643871600000,
-        "runId": "powerbi-test",
-        "registryName": null,
-        "registryVersion": null,
-        "properties": null
+        "runId": "powerbi-test"
     }
 },
 {
-    "auditHeader": null,
     "entityType": "dashboard",
     "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
-    "entityKeyAspect": null,
     "changeType": "UPSERT",
     "aspectName": "status",
     "aspect": {
@@ -145,17 +318,12 @@
     },
     "systemMetadata": {
         "lastObserved": 1643871600000,
-        "runId": "powerbi-test",
-        "registryName": null,
-        "registryVersion": null,
-        "properties": null
+        "runId": "powerbi-test"
     }
 },
 {
-    "auditHeader": null,
     "entityType": "dashboard",
     "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
-    "entityKeyAspect": null,
     "changeType": "UPSERT",
     "aspectName": "dashboardKey",
     "aspect": {
@@ -164,63 +332,60 @@
     },
     "systemMetadata": {
         "lastObserved": 1643871600000,
-        "runId": "powerbi-test",
-        "registryName": null,
-        "registryVersion": null,
-        "properties": null
+        "runId": "powerbi-test"
     }
 },
 {
-    "aspect": {
-        "contentType": "application/json",
-        "value": "{\"paths\": [\"/powerbi/second-demo-workspace\"]}"
-    },
-    "aspectName": "browsePaths",
-    "changeType": "UPSERT",
     "entityType": "dashboard",
     "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePaths",
+    "aspect": {
+        "value": "{\"paths\": [\"/powerbi/second-demo-workspace\"]}",
+        "contentType": "application/json"
+    },
     "systemMetadata": {
         "lastObserved": 1643871600000,
         "runId": "powerbi-test"
     }
 },
 {
-    "aspect": {
-        "contentType": "application/json",
-        "value": "{\"customProperties\": {\"chartCount\": \"0\", \"workspaceName\": \"second-demo-workspace\", \"workspaceId\": \"7D668CAD-8FFC-4505-9215-655BCA5BEBAE\"}, \"title\": \"test_dashboard2\", \"description\": \"test_dashboard2\", \"charts\": [], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"https://localhost/dashboards/web/1\"}"
-    },
-    "aspectName": "dashboardInfo",
-    "changeType": "UPSERT",
     "entityType": "dashboard",
     "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)",
+    "changeType": "UPSERT",
+    "aspectName": "dashboardInfo",
+    "aspect": {
+        "value": "{\"customProperties\": {\"chartCount\": \"0\", \"workspaceName\": \"second-demo-workspace\", \"workspaceId\": \"64ED5CAD-7C22-4684-8180-826122881108\"}, \"title\": \"test_dashboard2\", \"description\": \"test_dashboard2\", \"charts\": [], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"https://localhost/dashboards/web/1\"}",
+        "contentType": "application/json"
+    },
     "systemMetadata": {
         "lastObserved": 1643871600000,
         "runId": "powerbi-test"
     }
 },
 {
-    "aspect": {
-        "contentType": "application/json",
-        "value": "{\"removed\": false}"
-    },
-    "aspectName": "status",
-    "changeType": "UPSERT",
     "entityType": "dashboard",
     "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
     "systemMetadata": {
         "lastObserved": 1643871600000,
         "runId": "powerbi-test"
     }
 },
 {
-    "aspect": {
-        "contentType": "application/json",
-        "value": "{\"dashboardTool\": \"powerbi\", \"dashboardId\": \"powerbi.linkedin.com/dashboards/7D668CAD-8FFC-4505-9215-655BCA5BEBAE\"}"
-    },
-    "aspectName": "dashboardKey",
-    "changeType": "UPSERT",
     "entityType": "dashboard",
     "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)",
+    "changeType": "UPSERT",
+    "aspectName": "dashboardKey",
+    "aspect": {
+        "value": "{\"dashboardTool\": \"powerbi\", \"dashboardId\": \"powerbi.linkedin.com/dashboards/7D668CAD-8FFC-4505-9215-655BCA5BEBAE\"}",
+        "contentType": "application/json"
+    },
     "systemMetadata": {
         "lastObserved": 1643871600000,
         "runId": "powerbi-test"
diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
index 4105fdceb37ee..7815b369022c0 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
@@ -134,6 +134,31 @@ def register_mock_api(request_mock):
                 ]
             },
         },
+        "https://api.powerbi.com/v1.0/myorg/admin/dashboards/7D668CAD-8FFC-4505-9215-655BCA5BEBAE/users": {
+            "method": "GET",
+            "status_code": 200,
+            "json": {
+                "value": [
+                    {
+                        "identifier": "User3@foo.com",
+                        "displayName": "user3",
+                        "emailAddress": "User3@foo.com",
+                        "datasetUserAccessRight": "ReadWrite",
+                        "graphId": "C9EE53F2-88EA-4711-A173-AF0515A3CD46",
+                        "principalType": "User",
+                    },
+                    {
+                        "identifier": "User4@foo.com",
+                        "displayName": "user4",
+                        "emailAddress": "User4@foo.com",
+                        "datasetUserAccessRight": "ReadWrite",
+                        "graphId": "C9EE53F2-88EA-4711-A173-AF0515A5REWS",
+                        "principalType": "User",
+                    },
+                ]
+            },
+
+        },
         "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE/tiles": {
             "method": "GET",
             "status_code": 200,
@@ -168,6 +193,15 @@ def register_mock_api(request_mock):
                 "webUrl": "http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445",
             },
         },
+        "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C22-4684-8180-826122881108/datasets/05169CD2-E713-41E6-96AA-1D8066D95445": {
+            "method": "GET",
+            "status_code": 200,
+            "json": {
+                "id": "05169CD2-E713-41E6-96AA-1D8066D95445",
+                "name": "library-dataset",
+                "webUrl": "http://localhost/groups/64ED5CAD-7C22-4684-8180-826122881108/datasets/05169CD2-E713-41E6-96AA-1D8066D95445",
+            },
+        },
         "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed": {
             "method": "GET",
             "status_code": 200,
@@ -466,12 +500,12 @@ def test_powerbi_ingest(mock_msal, pytestconfig, tmp_path, mock_time, requests_m
 
     pipeline.run()
     pipeline.raise_from_status()
-    mce_out_file = "golden_test_ingest.json"
+    golden_file = "golden_test_ingest.json"
 
     mce_helpers.check_golden_file(
         pytestconfig,
         output_path=tmp_path / "powerbi_mces.json",
-        golden_path=f"{test_resources_dir}/{mce_out_file}",
+        golden_path=f"{test_resources_dir}/{golden_file}",
     )
 
 
@@ -555,12 +589,12 @@ def test_scan_all_workspaces(
     pipeline.run()
     pipeline.raise_from_status()
 
-    mce_out_file = "golden_test_scan_all_workspaces.json"
+    golden_file = "golden_test_scan_all_workspaces.json"
 
     mce_helpers.check_golden_file(
         pytestconfig,
         output_path=tmp_path / "powerbi_mces_scan_all_workspaces.json",
-        golden_path=f"{test_resources_dir}/{mce_out_file}",
+        golden_path=f"{test_resources_dir}/{golden_file}",
     )
 
 

From 3ca31a0ae2d6c6ce1e37183f77210c64417e65ab Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Tue, 20 Dec 2022 18:22:42 +0530
Subject: [PATCH 37/53] fix test

---
 .../tests/integration/powerbi/test_powerbi.py | 30 ++++++++-----------
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
index 7815b369022c0..fcd68e472675a 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
@@ -1,3 +1,4 @@
+from typing import Dict, Any
 from unittest import mock
 
 from freezegun import freeze_time
@@ -7,8 +8,6 @@
 
 FROZEN_TIME = "2022-02-03 07:00:00"
 
-call_number = 1
-
 
 def mock_msal_cca(*args, **kwargs):
     class MsalClient:
@@ -20,12 +19,16 @@ def acquire_token_for_client(self, *args, **kwargs):
     return MsalClient()
 
 
-def scan_init_response(_request, _context):
-    global call_number
-    if call_number == 1:
-        call_number += 1
-        return {"id": "4674efd1-603c-4129-8d82-03cf2be05aff"}
-    return {"id": "a674efd1-603c-4129-8d82-03cf2be05aff"}
+def scan_init_response(request, context):
+    # Request mock is passing POST input in the form of workspaces=<workspace_id>
+    workspace_id = request.text.split("=")[1]
+
+    w_id_vs_response: Dict[str, Any] = {
+        "64ED5CAD-7C10-4684-8180-826122881108": {"id": "4674efd1-603c-4129-8d82-03cf2be05aff"},
+        "64ED5CAD-7C22-4684-8180-826122881108": {"id": "a674efd1-603c-4129-8d82-03cf2be05aff"},
+    }
+
+    return w_id_vs_response[workspace_id]
 
 
 def register_mock_api(request_mock):
@@ -473,8 +476,6 @@ def default_source_config():
 @freeze_time(FROZEN_TIME)
 @mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
 def test_powerbi_ingest(mock_msal, pytestconfig, tmp_path, mock_time, requests_mock):
-    global call_number
-    call_number = 1
 
     test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
 
@@ -514,8 +515,6 @@ def test_powerbi_ingest(mock_msal, pytestconfig, tmp_path, mock_time, requests_m
 def test_override_ownership(
     mock_msal, pytestconfig, tmp_path, mock_time, requests_mock
 ):
-    global call_number
-    call_number = 1
 
     test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
 
@@ -556,8 +555,6 @@ def test_override_ownership(
 def test_scan_all_workspaces(
     mock_msal, pytestconfig, tmp_path, mock_time, requests_mock
 ):
-    global call_number
-    call_number = 1
 
     test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
 
@@ -601,8 +598,6 @@ def test_scan_all_workspaces(
 @freeze_time(FROZEN_TIME)
 @mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
 def test_extract_reports(mock_msal, pytestconfig, tmp_path, mock_time, requests_mock):
-    global call_number
-    call_number = 1
 
     test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
 
@@ -641,6 +636,7 @@ def test_extract_reports(mock_msal, pytestconfig, tmp_path, mock_time, requests_
 @freeze_time(FROZEN_TIME)
 @mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
 def test_extract_lineage(mock_msal, pytestconfig, tmp_path, mock_time, requests_mock):
+
     test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
 
     register_mock_api(request_mock=requests_mock)
@@ -678,6 +674,6 @@ def test_extract_lineage(mock_msal, pytestconfig, tmp_path, mock_time, requests_
 
     mce_helpers.check_golden_file(
         pytestconfig,
-        output_path=tmp_path / "powerbi_lineage_mces.json",
+        output_path=f"{tmp_path}/powerbi_lineage_mces.json",
         golden_path=f"{test_resources_dir}/{golden_file}",
     )

From 68363ff8ff73a84dc115499ad5908728de33b8d3 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Tue, 20 Dec 2022 18:39:02 +0530
Subject: [PATCH 38/53] lint fix

---
 .../tests/integration/powerbi/test_powerbi.py         | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
index fcd68e472675a..ce934ffc0a688 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
@@ -1,4 +1,4 @@
-from typing import Dict, Any
+from typing import Any, Dict
 from unittest import mock
 
 from freezegun import freeze_time
@@ -24,8 +24,12 @@ def scan_init_response(request, context):
     workspace_id = request.text.split("=")[1]
 
     w_id_vs_response: Dict[str, Any] = {
-        "64ED5CAD-7C10-4684-8180-826122881108": {"id": "4674efd1-603c-4129-8d82-03cf2be05aff"},
-        "64ED5CAD-7C22-4684-8180-826122881108": {"id": "a674efd1-603c-4129-8d82-03cf2be05aff"},
+        "64ED5CAD-7C10-4684-8180-826122881108": {
+            "id": "4674efd1-603c-4129-8d82-03cf2be05aff"
+        },
+        "64ED5CAD-7C22-4684-8180-826122881108": {
+            "id": "a674efd1-603c-4129-8d82-03cf2be05aff"
+        },
     }
 
     return w_id_vs_response[workspace_id]
@@ -160,7 +164,6 @@ def register_mock_api(request_mock):
                     },
                 ]
             },
-
         },
         "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE/tiles": {
             "method": "GET",

From 3abe48fb4107ff74a21ceaf2076962ff7c097742 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Wed, 21 Dec 2022 11:39:01 +0530
Subject: [PATCH 39/53] lint fix

---
 .../docs/sources/powerbi/powerbi_pre.md       |  48 +-
 .../ingestion/source/powerbi/config.py        |  13 +-
 .../ingestion/source/powerbi/powerbi.py       |  23 +-
 .../golden_test_lower_case_urn_ingest.json    | 436 ++++++++++++++++++
 .../tests/integration/powerbi/test_powerbi.py |  42 ++
 5 files changed, 555 insertions(+), 7 deletions(-)
 create mode 100644 metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json

diff --git a/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md b/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md
index c87435a077968..24f7b92cf8998 100644
--- a/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md
+++ b/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md
@@ -7,10 +7,10 @@ See the
 - Enhance admin APIs responses with detailed metadata
 ## Concept mapping 
 
-| Power BI              | Datahub             |                                                                                               
+| Power BI              | Datahub                 |                                                                                               
 |-----------------------|---------------------|
 | `Dashboard`           | `Dashboard`         |
-| `Dataset, Datasource` | `Dataset`           |
+| `Dataset's Table`     | `Dataset`           |
 | `Tile`                | `Chart`             |
 | `Report.webUrl`       | `Chart.externalUrl` |
 | `Workspace`           | `N/A`               |
@@ -18,3 +18,47 @@ See the
 | `Page`                | `Chart`             |
 
 If Tile is created from report then Chart.externalUrl is set to Report.webUrl.
+
+## Lineage
+You can control table lineage ingestion using `extract_lineage` configuration parameter, by default it is set to `true`. 
+
+PowerBI Source extracts the lineage information by parsing PowerBI M-Query expression.
+
+PowerBI Source supports M-Query expression for below listed PowerBI Data Sources 
+
+1.  Snowflake 
+2.  Oracle 
+3.  PostgreSQL
+4.  MS-SQL 
+
+Native SQL query parsing is only supported for `Snowflake` data-source and only first table from `FROM` clause will be ingested as upstream table. Advance SQL construct like JOIN and SUB-QUERIES in `FROM` clause are not supported.
+
+For example refer below native SQL query. The table `OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_UNIT_TARGET` will be ingested as upstream table.
+
+```shell
+let
+  Source = Value.NativeQuery(
+    Snowflake.Databases(
+      "sdfsd788.ws-east-2.fakecomputing.com", 
+      "operations_analytics_prod", 
+      [Role = "OPERATIONS_ANALYTICS_MEMBER"]
+    ){[Name = "OPERATIONS_ANALYTICS"]}[Data], 
+    "select #(lf)UPPER(REPLACE(AGENT_NAME,\'-\',\'\')) AS Agent,#(lf)TIER,#(lf)UPPER(MANAGER),#(lf)TEAM_TYPE,#(lf)DATE_TARGET,#(lf)MONTHID,#(lf)TARGET_TEAM,#(lf)SELLER_EMAIL,#(lf)concat((UPPER(REPLACE(AGENT_NAME,\'-\',\'\'))), MONTHID) as AGENT_KEY,#(lf)UNIT_TARGET AS SME_Quota,#(lf)AMV_TARGET AS Revenue_Quota,#(lf)SERVICE_QUOTA,#(lf)BL_TARGET,#(lf)SOFTWARE_QUOTA as Software_Quota#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_UNIT_TARGETS#(lf)#(lf)where YEAR_TARGET >= 2020#(lf)and TEAM_TYPE = \'foo\'#(lf)and TARGET_TEAM = \'bar\'", 
+    null, 
+    [EnableFolding = true]
+  ), 
+  #"Added Conditional Column" = Table.AddColumn(
+    Source, 
+    "Has PS Software Quota?", 
+    each 
+      if [TIER] = "Expansion (Medium)" then
+        "Yes"
+      else if [TIER] = "Acquisition" then
+        "Yes"
+      else
+        "No"
+  )
+in
+  #"Added Conditional Column"
+```
+
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
index 448c14700bcb5..1d820c726544f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
@@ -90,7 +90,7 @@ def report_charts_dropped(self, view: str) -> None:
 class PlatformDetail:
     platform_instance: str = pydantic.Field(
         default=None,
-        description="DataHub platform instance name. It should be same as you have used in ingestion receipe of DataHub platform ingestion source",
+        description="DataHub platform instance name. It should be same as you have used in ingestion receipe of DataHub platform ingestion source of particular platform",
     )
     env: str = pydantic.Field(
         default=DEFAULT_ENV,
@@ -143,6 +143,17 @@ class PowerBiAPIConfig(EnvBasedSourceConfigBase):
         description="Whether PowerBI native query should be parsed to extract lineage",
     )
 
+    # convert PowerBI data-set URN to lower-case
+    convert_urns_to_lowercase: bool = pydantic.Field(
+        default=False,
+        description="Whether to convert the PowerBI assets urns to lowercase",
+    )
+    # convert lineage dataset's urns to lowercase
+    convert_lineage_urns_to_lowercase: bool = pydantic.Field(
+        default=True,
+        description="Whether to convert the urns of ingested lineage dataset to lowercase",
+    )
+
     @validator("dataset_type_mapping")
     @classmethod
     def map_data_platform(cls, value):
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
index 373591ee7e09d..61119a8658d58 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
@@ -57,7 +57,7 @@
 
 class Mapper:
     """
-    Transfrom PowerBi concepts Dashboard, Dataset and Tile to DataHub concepts Dashboard, Dataset and Chart
+    Transform PowerBi concepts Dashboard, Dataset and Tile to DataHub concepts Dashboard, Dataset and Chart
     """
 
     class EquableMetadataWorkUnit(MetadataWorkUnit):
@@ -80,6 +80,21 @@ def __init__(
         self.__config = config
         self.__reporter = reporter
 
+    @staticmethod
+    def urn_to_lowercase(value: str, flag: bool) -> str:
+        if flag is True:
+            return value.lower()
+
+        return value
+
+    def lineage_urn_to_lowercase(self, value):
+        return Mapper.urn_to_lowercase(
+            value, self.__config.convert_lineage_urns_to_lowercase
+        )
+
+    def assets_urn_to_lowercase(self, value):
+        return Mapper.urn_to_lowercase(value, self.__config.convert_urns_to_lowercase)
+
     def new_mcp(
         self,
         entity_type,
@@ -131,7 +146,7 @@ def __to_datahub_dataset(
             # Create a URN for dataset
             ds_urn = builder.make_dataset_urn(
                 platform=self.__config.platform_name,
-                name=f"{table.full_name}",
+                name=self.assets_urn_to_lowercase(table.full_name),
                 env=self.__config.env,
             )
 
@@ -192,7 +207,7 @@ def __to_datahub_dataset(
                         platform=platform_name,
                         platform_instance=platform_instance_name,
                         env=platform_env,
-                        name=upstream_table.full_name,
+                        name=self.lineage_urn_to_lowercase(upstream_table.full_name),
                     )
                     upstream_table_class = UpstreamClass(
                         upstream_urn,
@@ -219,7 +234,7 @@ def __to_datahub_chart(
         Map PowerBi tile to datahub chart
         """
         LOGGER.info("Converting tile {}(id={}) to chart".format(tile.title, tile.id))
-        # Create an URN for chart
+        # Create a URN for chart
         chart_urn = builder.make_chart_urn(
             self.__config.platform_name, tile.get_urn_part()
         )
diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json b/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json
new file mode 100644
index 0000000000000..2eabb5dcc45f1
--- /dev/null
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json
@@ -0,0 +1,436 @@
+[
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"public issue_history\", \"description\": \"public issue_history\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_testtable,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"SNOWFLAKE_TESTTABLE\", \"description\": \"SNOWFLAKE_TESTTABLE\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_testtable,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"snowflake native-query\", \"description\": \"snowflake native-query\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"job-history\", \"description\": \"job-history\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"postgres_test_table\", \"description\": \"postgres_test_table\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"dbo_book_issue\", \"description\": \"dbo_book_issue\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"ms_sql_native_table\", \"description\": \"ms_sql_native_table\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "corpuser",
+    "entityUrn": "urn:li:corpuser:users.User1@foo.com",
+    "changeType": "UPSERT",
+    "aspectName": "corpUserInfo",
+    "aspect": {
+        "value": "{\"active\": true, \"displayName\": \"user1\", \"email\": \"User1@foo.com\", \"title\": \"user1\"}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "corpuser",
+    "entityUrn": "urn:li:corpuser:users.User1@foo.com",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "corpuser",
+    "entityUrn": "urn:li:corpuser:users.User1@foo.com",
+    "changeType": "UPSERT",
+    "aspectName": "corpUserKey",
+    "aspect": {
+        "value": "{\"username\": \"User1@foo.com\"}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "corpuser",
+    "entityUrn": "urn:li:corpuser:users.User2@foo.com",
+    "changeType": "UPSERT",
+    "aspectName": "corpUserInfo",
+    "aspect": {
+        "value": "{\"active\": true, \"displayName\": \"user2\", \"email\": \"User2@foo.com\", \"title\": \"user2\"}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "corpuser",
+    "entityUrn": "urn:li:corpuser:users.User2@foo.com",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "corpuser",
+    "entityUrn": "urn:li:corpuser:users.User2@foo.com",
+    "changeType": "UPSERT",
+    "aspectName": "corpUserKey",
+    "aspect": {
+        "value": "{\"username\": \"User2@foo.com\"}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
+    "changeType": "UPSERT",
+    "aspectName": "chartInfo",
+    "aspect": {
+        "value": "{\"customProperties\": {\"datasetId\": \"05169CD2-E713-41E6-9600-1D8066D95445\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"test_tile\", \"description\": \"test_tile\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_testtable,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)\"}]}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)",
+    "changeType": "UPSERT",
+    "aspectName": "chartKey",
+    "aspect": {
+        "value": "{\"dashboardTool\": \"powerbi\", \"chartId\": \"powerbi.linkedin.com/charts/B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0\"}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+    "changeType": "UPSERT",
+    "aspectName": "chartInfo",
+    "aspect": {
+        "value": "{\"customProperties\": {\"datasetId\": \"ba0130a1-5b03-40de-9535-b34e778ea6ed\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/ba0130a1-5b03-40de-9535-b34e778ea6ed/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"yearly_sales\", \"description\": \"yearly_sales\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.ms_sql_native_table,DEV)\"}]}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)",
+    "changeType": "UPSERT",
+    "aspectName": "chartKey",
+    "aspect": {
+        "value": "{\"dashboardTool\": \"powerbi\", \"chartId\": \"powerbi.linkedin.com/charts/23212598-23b5-4980-87cc-5fc0ecd84385\"}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dashboard",
+    "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePaths",
+    "aspect": {
+        "value": "{\"paths\": [\"/powerbi/demo-workspace\"]}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dashboard",
+    "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
+    "changeType": "UPSERT",
+    "aspectName": "dashboardInfo",
+    "aspect": {
+        "value": "{\"customProperties\": {\"chartCount\": \"2\", \"workspaceName\": \"demo-workspace\", \"workspaceId\": \"64ED5CAD-7C10-4684-8180-826122881108\"}, \"title\": \"test_dashboard\", \"description\": \"test_dashboard\", \"charts\": [\"urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)\", \"urn:li:chart:(powerbi,charts.23212598-23b5-4980-87cc-5fc0ecd84385)\"], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"https://localhost/dashboards/web/1\"}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dashboard",
+    "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dashboard",
+    "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
+    "changeType": "UPSERT",
+    "aspectName": "dashboardKey",
+    "aspect": {
+        "value": "{\"dashboardTool\": \"powerbi\", \"dashboardId\": \"powerbi.linkedin.com/dashboards/7D668CAD-7FFC-4505-9215-655BCA5BEBAE\"}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dashboard",
+    "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-7FFC-4505-9215-655BCA5BEBAE)",
+    "changeType": "UPSERT",
+    "aspectName": "ownership",
+    "aspect": {
+        "value": "{\"owners\": [{\"owner\": \"urn:li:corpuser:users.User1@foo.com\", \"type\": \"NONE\"}, {\"owner\": \"urn:li:corpuser:users.User2@foo.com\", \"type\": \"NONE\"}], \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
index ce934ffc0a688..d6ae1b033b10c 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
@@ -467,6 +467,7 @@ def default_source_config():
         "workspace_id": "64ED5CAD-7C10-4684-8180-826122881108",
         "extract_lineage": False,
         "extract_reports": False,
+        "convert_lineage_urns_to_lowercase": False,
         "workspace_id_pattern": {"allow": ["64ED5CAD-7C10-4684-8180-826122881108"]},
         "dataset_type_mapping": {
             "PostgreSql": "postgres",
@@ -513,6 +514,47 @@ def test_powerbi_ingest(mock_msal, pytestconfig, tmp_path, mock_time, requests_m
     )
 
 
+@freeze_time(FROZEN_TIME)
+@mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
+def test_powerbi_ingest_urn_lower_case(
+    mock_msal, pytestconfig, tmp_path, mock_time, requests_mock
+):
+
+    test_resources_dir = pytestconfig.rootpath / "tests/integration/powerbi"
+
+    register_mock_api(request_mock=requests_mock)
+
+    pipeline = Pipeline.create(
+        {
+            "run_id": "powerbi-test",
+            "source": {
+                "type": "powerbi",
+                "config": {
+                    **default_source_config(),
+                    "convert_urns_to_lowercase": True,
+                    "convert_lineage_urns_to_lowercase": True,
+                },
+            },
+            "sink": {
+                "type": "file",
+                "config": {
+                    "filename": f"{tmp_path}/powerbi_lower_case_urn_mces.json",
+                },
+            },
+        }
+    )
+
+    pipeline.run()
+    pipeline.raise_from_status()
+    golden_file = "golden_test_lower_case_urn_ingest.json"
+
+    mce_helpers.check_golden_file(
+        pytestconfig,
+        output_path=f"{tmp_path}/powerbi_lower_case_urn_mces.json",
+        golden_path=f"{test_resources_dir}/{golden_file}",
+    )
+
+
 @freeze_time(FROZEN_TIME)
 @mock.patch("msal.ConfidentialClientApplication", side_effect=mock_msal_cca)
 def test_override_ownership(

From 843cf0d4dabb97050fad1f00cd4e5f7d9daab036 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Wed, 21 Dec 2022 11:44:26 +0530
Subject: [PATCH 40/53] spell fix

---
 .../src/datahub/ingestion/source/powerbi/config.py              | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
index 1d820c726544f..556c80ba578b2 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
@@ -143,7 +143,7 @@ class PowerBiAPIConfig(EnvBasedSourceConfigBase):
         description="Whether PowerBI native query should be parsed to extract lineage",
     )
 
-    # convert PowerBI data-set URN to lower-case
+    # convert PowerBI dataset URN to lower-case
     convert_urns_to_lowercase: bool = pydantic.Field(
         default=False,
         description="Whether to convert the PowerBI assets urns to lowercase",

From dfe51a0aede1afea0d4ee58e5ace52eaae15aaad Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Mon, 26 Dec 2022 21:26:09 +0530
Subject: [PATCH 41/53] 1. Lint fix 2. Multiple data-source support 3.
 Table.Combine

---
 .../source/powerbi/m_query/data_classes.py    |  43 ++
 .../source/powerbi/m_query/parser.py          |   1 +
 .../source/powerbi/m_query/resolver.py        | 458 +++++++++++-------
 .../source/powerbi/m_query/tree_function.py   |   1 +
 .../powerbi/powerbi-lexical-grammar.rule      |   2 +-
 .../integration/powerbi/test_m_parser.py      |  56 ++-
 6 files changed, 379 insertions(+), 182 deletions(-)
 create mode 100644 metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/data_classes.py

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/data_classes.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/data_classes.py
new file mode 100644
index 0000000000000..6f845a32b7007
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/data_classes.py
@@ -0,0 +1,43 @@
+from abc import ABC
+from dataclasses import dataclass
+from typing import Any, Dict, Optional
+
+from lark import Tree
+
+
+class AbstractIdentifierAccessor(ABC):  # To pass lint
+    pass
+
+
+# @dataclass
+# class ItemSelector:
+#     items: Dict[str, Any]
+#     next: Optional[AbstractIdentifierAccessor]
+
+
+@dataclass
+class IdentifierAccessor(AbstractIdentifierAccessor):
+    """
+    statement
+        public_order_date = Source{[Schema="public",Item="order_date"]}[Data]
+    will be converted to IdentifierAccessor instance
+    where:
+
+        "Source" is identifier
+
+        "[Schema="public",Item="order_date"]" is "items" in ItemSelector. Data of items varies as per DataSource
+
+        "public_order_date" is in "next" of ItemSelector. The "next" will be None if this identifier is leaf i.e. table
+
+    """
+
+    identifier: str
+    items: Dict[str, Any]
+    next: Optional[AbstractIdentifierAccessor]
+
+
+@dataclass
+class DataAccessFunctionDetail:
+    arg_list: Tree
+    data_access_function_name: str
+    identifier_accessor: Optional[IdentifierAccessor]
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
index 1731fa250e0dd..35af1fb89f3b2 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
@@ -43,6 +43,7 @@ def get_upstream_tables(
 
     try:
         parse_tree: Tree = _parse_expression(table.expression)
+        print(parse_tree.pretty())
         valid, message = validator.validate_parse_tree(
             parse_tree, native_query_enabled=native_query_enabled
         )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
index a04ff735b9860..592e937e1257c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
@@ -2,12 +2,16 @@
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from enum import Enum
-from typing import Any, Dict, List, Optional, Tuple, Type, cast
+from typing import Any, Dict, List, Optional, Tuple, Type, Union, cast
 
 from lark import Tree
 
 from datahub.ingestion.source.powerbi.config import PowerBiDashboardSourceReport
 from datahub.ingestion.source.powerbi.m_query import native_sql_parser, tree_function
+from datahub.ingestion.source.powerbi.m_query.data_classes import (
+    DataAccessFunctionDetail,
+    IdentifierAccessor,
+)
 from datahub.ingestion.source.powerbi.proxy import PowerBiAPI
 
 LOGGER = logging.getLogger(__name__)
@@ -46,7 +50,9 @@ class SupportedDataPlatform(Enum):
 
 class AbstractTableFullNameCreator(ABC):
     @abstractmethod
-    def get_full_table_names(self, token_dict: Dict[str, Any]) -> List[str]:
+    def get_full_table_names(
+        self, data_access_func_detail: DataAccessFunctionDetail
+    ) -> List[str]:
         pass
 
     @abstractmethod
@@ -58,6 +64,7 @@ class AbstractDataAccessMQueryResolver(ABC):
     table: PowerBiAPI.Table
     parse_tree: Tree
     reporter: PowerBiDashboardSourceReport
+    data_access_functions: List[str]
 
     def __init__(
         self,
@@ -68,6 +75,7 @@ def __init__(
         self.table = table
         self.parse_tree = parse_tree
         self.reporter = reporter
+        self.data_access_functions = SupportedResolver.get_function_names()
 
     @abstractmethod
     def resolve_to_data_platform_table_list(self) -> List[DataPlatformTable]:
@@ -106,21 +114,16 @@ def get_item_selector_tokens(
         identifier: List[str] = tree_function.token_values(
             cast(Tree, identifier_tree)
         )  # type :ignore
+
         # convert tokens to dict
         iterator = iter(tokens)
-        # cast to satisfy lint
-        return identifier[0], dict(zip(iterator, iterator))
 
-    def get_argument_list(self, variable_statement: Tree) -> Optional[Tree]:
-        expression_tree: Optional[Tree] = tree_function.first_expression_func(
-            variable_statement
-        )
-        if expression_tree is None:
-            LOGGER.debug("First expression rule not found in input tree")
-            return None
+        return "".join(identifier), dict(zip(iterator, iterator))
 
+    @staticmethod
+    def get_argument_list(invoke_expression: Tree) -> Optional[Tree]:
         argument_list: Optional[Tree] = tree_function.first_arg_list_func(
-            expression_tree
+            invoke_expression
         )
         if argument_list is None:
             LOGGER.debug("First argument-list rule not found in input tree")
@@ -128,13 +131,136 @@ def get_argument_list(self, variable_statement: Tree) -> Optional[Tree]:
 
         return argument_list
 
-    def make_token_dict(self, identifier: str) -> Dict[str, Any]:
-        token_dict: Dict[str, Any] = {}
+    def _process_invoke_expression(
+        self, invoke_expression: Tree
+    ) -> Union[DataAccessFunctionDetail, List[str], None]:
+
+        letter_tree: Tree = invoke_expression.children[0]
+        data_access_func: str = tree_function.make_function_name(letter_tree)
+        # The invoke function is either DataAccess function like PostgreSQL.Database(<argument-list>) or
+        # some other function like Table.AddColumn or Table.Combine and so on
+        if data_access_func in self.data_access_functions:
+            arg_list: Optional[Tree] = MQueryResolver.get_argument_list(
+                invoke_expression
+            )
+            if arg_list is None:
+                self.reporter.report_warning(
+                    f"{self.table.full_name}-arg-list",
+                    f"Argument list not found for data-access-function {data_access_func}",
+                )
+                return None
+
+            return DataAccessFunctionDetail(
+                arg_list=arg_list,
+                data_access_function_name=data_access_func,
+                identifier_accessor=None,
+            )
+
+        # function is not data-access function, lets process function argument
+        first_arg_tree: Optional[Tree] = tree_function.first_arg_list_func(
+            invoke_expression
+        )
+
+        if first_arg_tree is None:
+            LOGGER.debug(
+                "Function invocation without argument in expression = %s",
+                invoke_expression.pretty(),
+            )
+            self.reporter.report_warning(
+                f"{self.table.full_name}-variable-statement",
+                "Function invocation without argument",
+            )
+            return None
+
+        first_argument: Tree = tree_function.flat_argument_list(first_arg_tree)[
+            0
+        ]  # take first argument only
+        expression: Optional[Tree] = tree_function.first_list_expression_func(
+            first_argument
+        )
+
+        LOGGER.debug("Extracting token from tree %s", first_argument.pretty())
+        if expression is None:
+            expression = tree_function.first_type_expression_func(first_argument)
+            if expression is None:
+                LOGGER.debug(
+                    "Either list_expression or type_expression is not found = %s",
+                    invoke_expression.pretty(),
+                )
+                self.reporter.report_warning(
+                    f"{self.table.full_name}-variable-statement",
+                    "Function argument expression is not supported",
+                )
+                return None
+
+        tokens: List[str] = tree_function.remove_whitespaces_from_list(
+            tree_function.token_values(expression)
+        )
+
+        LOGGER.debug("Tokens in invoke expression are %s", tokens)
+        return tokens
+
+    def _process_item_selector_expression(
+        self, rh_tree: Tree
+    ) -> Tuple[Optional[str], Optional[Dict[str, str]]]:
+        new_identifier, key_vs_value = self.get_item_selector_tokens(  # type: ignore
+            cast(Tree, tree_function.first_expression_func(rh_tree))
+        )
+
+        return new_identifier, key_vs_value
+
+    @staticmethod
+    def _create_or_update_identifier_accessor(
+        identifier_accessor: Optional[IdentifierAccessor],
+        new_identifier: str,
+        key_vs_value: Dict[str, Any],
+    ) -> IdentifierAccessor:
+
+        # def create_item_selector(items: Dict[str, Any], _next: IdentifierAccessor):
+        #     return ItemSelector(
+        #         items=items,
+        #         next=_next,
+        #     )
+        #
+        # def update_identifier_accessor(node: IdentifierAccessor, identifier: str, items: Dict[str, Any]) -> bool:
+        #     flag: bool = False
+        #     if node.identifier == identifier:
+        #         node.item_selectors.append(
+        #             create_item_selector(
+        #                 items=items
+        #             )
+        #         )
+        #         return True
+        #
+        #     for item_selector in node.item_selectors:
+        #         if item_selector.next is None:
+        #             continue
+        #         flag = update_identifier_accessor(item_selector.next, identifier, items)
+        #         if flag is True:
+        #             break
+        #
+        #     return flag
+
+        # It is first identifier_accessor
+        if identifier_accessor is None:
+            return IdentifierAccessor(
+                identifier=new_identifier, items=key_vs_value, next=None
+            )
+
+        new_identifier_accessor: IdentifierAccessor = IdentifierAccessor(
+            identifier=new_identifier, items=key_vs_value, next=identifier_accessor
+        )
+
+        return new_identifier_accessor
 
-        def fill_token_dict(
-            identifier: str,
-            supported_data_access_func: List[str],
-            t_dict: Dict[str, Any],
+    def create_data_access_functional_detail(
+        self, identifier: str
+    ) -> List[DataAccessFunctionDetail]:
+        table_links: List[DataAccessFunctionDetail] = []
+
+        def internal(
+            current_identifier: str,
+            identifier_accessor: Optional[IdentifierAccessor],
         ) -> None:
             """
             1) Find statement where identifier appear in the left-hand side i.e. identifier  = expression
@@ -146,105 +272,74 @@ def fill_token_dict(
             5) This recursion will continue till we reach to data-access function and during recursion we will fill
                token_dict dictionary for all item_selector we find during traversal.
 
-            :param identifier: variable to look for
-            :param supported_data_access_func: List of supported data-access functions
-            :param t_dict: dict where key is identifier and value is key-value pair which represent item selected from
-                           identifier
+            :param current_identifier: variable to look for
+            :param identifier_accessor:
             :return: None
             """
+            # Grammar of variable_statement is <variable-name> = <expression>
+            # Examples: Source = PostgreSql.Database(<arg-list>)
+            #           public_order_date = Source{[Schema="public",Item="order_date"]}[Data]
             v_statement: Optional[Tree] = tree_function.get_variable_statement(
-                self.parse_tree, identifier
+                self.parse_tree, current_identifier
             )
             if v_statement is None:
                 self.reporter.report_warning(
                     f"{self.table.full_name}-variable-statement",
-                    f"output variable ({identifier}) statement not found in table expression",
+                    f"output variable ({current_identifier}) statement not found in table expression",
                 )
                 return None
 
-            expression_tree: Optional[Tree] = tree_function.first_expression_func(
-                v_statement
-            )
-            if expression_tree is None:
+            # Any expression after "=" sign of variable-statement
+            rh_tree: Optional[Tree] = tree_function.first_expression_func(v_statement)
+            if rh_tree is None:
                 LOGGER.debug("Expression tree not found")
                 LOGGER.debug(v_statement.pretty())
                 return None
+
             invoke_expression: Optional[
                 Tree
-            ] = tree_function.first_invoke_expression_func(expression_tree)
-            if invoke_expression is not None:
-                letter_tree: Tree = invoke_expression.children[0]
-                data_access_func: str = tree_function.make_function_name(letter_tree)
-                if data_access_func in supported_data_access_func:
-                    token_dict.update(
-                        {
-                            f"{data_access_func}": {
-                                "arg_list": self.get_argument_list(expression_tree),
-                                **t_dict,
-                            }
-                        }
-                    )
-                    return
+            ] = tree_function.first_invoke_expression_func(rh_tree)
 
-                first_arg_tree: Optional[Tree] = tree_function.first_arg_list_func(
-                    invoke_expression
-                )
-                if first_arg_tree is None:
-                    LOGGER.debug(
-                        "Function invocation without argument in expression = %s",
-                        invoke_expression.pretty(),
-                    )
-                    self.reporter.report_warning(
-                        f"{self.table.full_name}-variable-statement",
-                        "Function invocation without argument",
-                    )
-                    return None
-                type_expression: Optional[
-                    Tree
-                ] = tree_function.first_type_expression_func(first_arg_tree)
-                if type_expression is None:
-                    LOGGER.debug(
-                        "Type expression not found in expression = %s",
-                        first_arg_tree.pretty(),
-                    )
-                    self.reporter.report_warning(
-                        f"{self.table.full_name}-variable-statement",
-                        "Type expression not found",
+            if invoke_expression is not None:
+                result: Union[
+                    DataAccessFunctionDetail, List[str], None
+                ] = self._process_invoke_expression(invoke_expression)
+                if result is None:
+                    return None  # No need to process some un-expected grammar found while processing invoke_expression
+                if isinstance(result, DataAccessFunctionDetail):
+                    cast(
+                        DataAccessFunctionDetail, result
+                    ).identifier_accessor = identifier_accessor
+                    table_links.append(result)  # Link of a table is completed
+                    identifier_accessor = (
+                        None  # reset the identifier_accessor for other table
                     )
                     return None
+                # Process first argument of the function.
+                # The first argument can be a single table argument or list of table.
+                # For example Table.Combine({t1,t2},....), here first argument is list of table.
+                # Table.AddColumn(t1,....), here first argument is single table.
+                for token in cast(List[str], result):
+                    internal(token, identifier_accessor)
 
-                tokens: List[str] = tree_function.token_values(type_expression)
-                if len(tokens) != 1:
-                    LOGGER.debug(
-                        "type-expression has more than one identifier = %s",
-                        type_expression.pretty(),
-                    )
-                    self.reporter.report_warning(
-                        f"{self.table.full_name}-variable-statement",
-                        "Unsupported type expression",
-                    )
-                    return None
-                new_identifier: str = tokens[0]
-                fill_token_dict(new_identifier, supported_data_access_func, t_dict)
             else:
-                new_identifier, key_vs_value = self.get_item_selector_tokens(  # type: ignore
-                    cast(Tree, tree_function.first_expression_func(expression_tree))
+                new_identifier, key_vs_value = self._process_item_selector_expression(
+                    rh_tree
                 )
-                current_selector: Dict[str, Any] = {
-                    f"{new_identifier}": {
-                        "item_selectors": [
-                            {"items": key_vs_value, "assigned_to": identifier}
-                        ],
-                        **t_dict,
-                    }
-                }
-                fill_token_dict(
-                    new_identifier, supported_data_access_func, current_selector
+                if new_identifier is None or key_vs_value is None:
+                    LOGGER.debug("Required information not found in rh_tree")
+                    return None
+                new_identifier_accessor: IdentifierAccessor = (
+                    self._create_or_update_identifier_accessor(
+                        identifier_accessor, new_identifier, key_vs_value
+                    )
                 )
 
-        fill_token_dict(identifier, SupportedResolver.get_function_names(), {})
+                return internal(new_identifier, new_identifier_accessor)
 
-        return token_dict
+        internal(identifier, None)
+
+        return table_links
 
     def resolve_to_data_platform_table_list(self) -> List[DataPlatformTable]:
         data_platform_tables: List[DataPlatformTable] = []
@@ -252,6 +347,7 @@ def resolve_to_data_platform_table_list(self) -> List[DataPlatformTable]:
         output_variable: Optional[str] = tree_function.get_output_variable(
             self.parse_tree
         )
+
         if output_variable is None:
             self.reporter.report_warning(
                 f"{self.table.full_name}-output-variable",
@@ -259,27 +355,32 @@ def resolve_to_data_platform_table_list(self) -> List[DataPlatformTable]:
             )
             return data_platform_tables
 
-        token_dict: Dict[str, Any] = self.make_token_dict(output_variable)
+        table_links: List[
+            DataAccessFunctionDetail
+        ] = self.create_data_access_functional_detail(output_variable)
 
-        # each key is data-access function
-        for data_access_func in token_dict.keys():
-            supported_resolver = SupportedResolver.get_resolver(data_access_func)
+        # Each item is data-access function
+        for f_detail in table_links:
+            supported_resolver = SupportedResolver.get_resolver(
+                f_detail.data_access_function_name
+            )
             if supported_resolver is None:
                 LOGGER.debug(
                     "Resolver not found for the data-access-function %s",
-                    data_access_func,
+                    f_detail.data_access_function_name,
                 )
                 self.reporter.report_warning(
                     f"{self.table.full_name}-data-access-function",
-                    f"Resolver not found for data-access-function = {data_access_func}",
+                    f"Resolver not found for data-access-function = {f_detail.data_access_function_name}",
                 )
                 continue
 
             table_full_name_creator: AbstractTableFullNameCreator = (
                 supported_resolver.get_table_full_name_creator()()
             )
+
             for table_full_name in table_full_name_creator.get_full_table_names(
-                token_dict
+                f_detail
             ):
                 data_platform_tables.append(
                     DataPlatformTable(
@@ -302,34 +403,34 @@ class DefaultTwoStepDataAccessSources(AbstractTableFullNameCreator, ABC):
             dbo_book_issue
     """
 
-    def two_level_access_pattern(self, token_dict: Dict[str, Any]) -> List[str]:
+    def two_level_access_pattern(
+        self, data_access_func_detail: DataAccessFunctionDetail
+    ) -> List[str]:
         full_table_names: List[str] = []
 
-        LOGGER.debug("Processing PostgreSQL token-dict %s", token_dict)
-
-        for data_access_function in token_dict:
-            arguments: List[str] = tree_function.strip_char_from_list(
-                values=tree_function.remove_whitespaces_from_list(
-                    tree_function.token_values(
-                        token_dict[data_access_function]["arg_list"]
-                    )
-                ),
-                char='"',
-            )
-            # delete arg_list as we consumed it and don't want to process it in next step
-            if len(arguments) != 2:
-                LOGGER.debug("Expected 2 arguments, but got {%s}", len(arguments))
-                return full_table_names
+        LOGGER.debug(
+            "Processing PostgreSQL data-access function detail %s",
+            data_access_func_detail,
+        )
+        arguments: List[str] = tree_function.strip_char_from_list(
+            values=tree_function.remove_whitespaces_from_list(
+                tree_function.token_values(data_access_func_detail.arg_list)
+            ),
+            char='"',
+        )
 
-            del token_dict[data_access_function]["arg_list"]
+        if len(arguments) != 2:
+            LOGGER.debug("Expected 2 arguments, but got {%s}", len(arguments))
+            return full_table_names
 
-            db_name: str = arguments[1]
-            for source in token_dict[data_access_function]:
-                source_dict: Dict[str, Any] = token_dict[data_access_function][source]
-                for schema in source_dict["item_selectors"]:
-                    schema_name: str = schema["items"]["Schema"]
-                    table_name: str = schema["items"]["Item"]
-                    full_table_names.append(f"{db_name}.{schema_name}.{table_name}")
+        db_name: str = arguments[1]
+        schema_name: str = cast(
+            IdentifierAccessor, data_access_func_detail.identifier_accessor
+        ).items["Schema"]
+        table_name: str = cast(
+            IdentifierAccessor, data_access_func_detail.identifier_accessor
+        ).items["Item"]
+        full_table_names.append(f"{db_name}.{schema_name}.{table_name}")
 
         LOGGER.debug("PostgreSQL full-table-names = %s", full_table_names)
 
@@ -337,8 +438,10 @@ def two_level_access_pattern(self, token_dict: Dict[str, Any]) -> List[str]:
 
 
 class PostgresTableFullNameCreator(DefaultTwoStepDataAccessSources):
-    def get_full_table_names(self, token_dict: Dict[str, Any]) -> List[str]:
-        return self.two_level_access_pattern(token_dict)
+    def get_full_table_names(
+        self, data_access_func_detail: DataAccessFunctionDetail
+    ) -> List[str]:
+        return self.two_level_access_pattern(data_access_func_detail)
 
     def get_platform_pair(self) -> DataPlatformPair:
         return SupportedDataPlatform.POSTGRES_SQL.value
@@ -348,13 +451,13 @@ class MSSqlTableFullNameCreator(DefaultTwoStepDataAccessSources):
     def get_platform_pair(self) -> DataPlatformPair:
         return SupportedDataPlatform.MS_SQL.value
 
-    def get_full_table_names(self, token_dict: Dict[str, Any]) -> List[str]:
+    def get_full_table_names(
+        self, data_access_func_detail: DataAccessFunctionDetail
+    ) -> List[str]:
         full_table_names: List[str] = []
-        data_access_dict: Dict[str, Any] = list(token_dict.values())[0]
-
         arguments: List[str] = tree_function.strip_char_from_list(
             values=tree_function.remove_whitespaces_from_list(
-                tree_function.token_values(data_access_dict["arg_list"])
+                tree_function.token_values(data_access_func_detail.arg_list)
             ),
             char='"',
         )
@@ -362,7 +465,7 @@ def get_full_table_names(self, token_dict: Dict[str, Any]) -> List[str]:
         if len(arguments) == 2:
             # It is regular case of MS-SQL
             LOGGER.debug("Handling with regular case")
-            return self.two_level_access_pattern(token_dict)
+            return self.two_level_access_pattern(data_access_func_detail)
 
         if len(arguments) >= 4 and arguments[2] != "Query":
             LOGGER.debug("Unsupported case is found. Second index is not the Query")
@@ -380,6 +483,7 @@ def get_full_table_names(self, token_dict: Dict[str, Any]) -> List[str]:
             full_table_names.append(
                 f"{db_name}.{schema_and_table[0]}.{schema_and_table[1]}"
             )
+
         LOGGER.debug("MS-SQL full-table-names %s", full_table_names)
 
         return full_table_names
@@ -400,35 +504,31 @@ def _get_db_name(self, value: str) -> Optional[str]:
 
         return db_name
 
-    def get_full_table_names(self, token_dict: Dict[str, Any]) -> List[str]:
+    def get_full_table_names(
+        self, data_access_func_detail: DataAccessFunctionDetail
+    ) -> List[str]:
         full_table_names: List[str] = []
 
-        LOGGER.debug("Processing Oracle token-dict %s", token_dict)
+        LOGGER.debug(
+            "Processing Oracle data-access function detail %s", data_access_func_detail
+        )
 
-        for data_access_function in token_dict:
-            arguments: List[str] = tree_function.remove_whitespaces_from_list(
-                tree_function.token_values(token_dict[data_access_function]["arg_list"])
-            )
-            # delete arg_list as we consumed it and don't want to process it in next step
-            del token_dict[data_access_function]["arg_list"]
-
-            for source in token_dict[data_access_function]:
-                source_dict: Dict[str, Any] = token_dict[data_access_function][source]
-
-                db_name: Optional[str] = self._get_db_name(arguments[0])
-                if db_name is None:
-                    return full_table_names
-
-                for schema in source_dict["item_selectors"]:
-                    schema_name: str = schema["items"]["Schema"]
-                    for item_selectors in source_dict[schema["assigned_to"]]:
-                        for item_selector in source_dict[schema["assigned_to"]][
-                            item_selectors
-                        ]:
-                            table_name: str = item_selector["items"]["Name"]
-                            full_table_names.append(
-                                f"{db_name}.{schema_name}.{table_name}"
-                            )
+        arguments: List[str] = tree_function.remove_whitespaces_from_list(
+            tree_function.token_values(data_access_func_detail.arg_list)
+        )
+
+        db_name: Optional[str] = self._get_db_name(arguments[0])
+        if db_name is None:
+            return full_table_names
+
+        schema_name: str = cast(
+            IdentifierAccessor, data_access_func_detail.identifier_accessor
+        ).items["Schema"]
+        table_name: str = cast(
+            IdentifierAccessor,
+            cast(IdentifierAccessor, data_access_func_detail.identifier_accessor).next,
+        ).items["Name"]
+        full_table_names.append(f"{db_name}.{schema_name}.{table_name}")
 
         return full_table_names
 
@@ -437,41 +537,39 @@ class SnowflakeTableFullNameCreator(AbstractTableFullNameCreator):
     def get_platform_pair(self) -> DataPlatformPair:
         return SupportedDataPlatform.SNOWFLAKE.value
 
-    def get_full_table_names(self, token_dict: Dict[str, Any]) -> List[str]:
-        full_table_names: List[str] = []
+    def get_full_table_names(
+        self, data_access_func_detail: DataAccessFunctionDetail
+    ) -> List[str]:
 
-        LOGGER.debug("Processing Snowflake token-dict %s", token_dict)
+        LOGGER.debug("Processing Snowflake function detail %s", data_access_func_detail)
+        # First is database name
+        db_name: str = data_access_func_detail.identifier_accessor.items["Name"]  # type: ignore
+        # Second is schema name
+        schema_name: str = cast(
+            IdentifierAccessor, data_access_func_detail.identifier_accessor.next  # type: ignore
+        ).items["Name"]
+        # Third is table name
+        table_name: str = cast(
+            IdentifierAccessor, data_access_func_detail.identifier_accessor.next.next  # type: ignore
+        ).items["Name"]
 
-        data_access_dict: Dict[str, Any] = list(token_dict.values())[0]
-        del data_access_dict["arg_list"]
+        full_table_name: str = f"{db_name}.{schema_name}.{table_name}"
 
-        for source in data_access_dict:
-            for db_its in data_access_dict[source]["item_selectors"]:
-                db_name: str = db_its["items"]["Name"]
-                for schema_its in data_access_dict[source][db_its["assigned_to"]][
-                    "item_selectors"
-                ]:
-                    schema_name: str = schema_its["items"]["Name"]
-                    for table_its in data_access_dict[source][db_its["assigned_to"]][
-                        schema_its["assigned_to"]
-                    ]["item_selectors"]:
-                        table_name: str = table_its["items"]["Name"]
-                        full_table_names.append(f"{db_name}.{schema_name}.{table_name}")
+        LOGGER.debug("Snowflake full-table-name %s", full_table_name)
 
-        LOGGER.debug("Snowflake full-table-name %s", full_table_names)
-
-        return full_table_names
+        return [full_table_name]
 
 
 class NativeQueryTableFullNameCreator(AbstractTableFullNameCreator):
     def get_platform_pair(self) -> DataPlatformPair:
         return SupportedDataPlatform.SNOWFLAKE.value
 
-    def get_full_table_names(self, token_dict: Dict[str, Any]) -> List[str]:
+    def get_full_table_names(
+        self, data_access_func_detail: DataAccessFunctionDetail
+    ) -> List[str]:
         full_table_names: List[str] = []
-        data_access_dict: Dict[str, Any] = list(token_dict.values())[0]
         t1: Tree = cast(
-            Tree, tree_function.first_arg_list_func(data_access_dict["arg_list"])
+            Tree, tree_function.first_arg_list_func(data_access_func_detail.arg_list)
         )
         flat_argument_list: List[Tree] = tree_function.flat_argument_list(t1)
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py
index b6ab6b5261cf3..aac946d9b7987 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py
@@ -156,3 +156,4 @@ def flat_argument_list(tree: Tree) -> List[Tree]:
 first_identifier_func = partial(get_first_rule, rule="identifier")
 first_invoke_expression_func = partial(get_first_rule, rule="invoke_expression")
 first_type_expression_func = partial(get_first_rule, rule="type_expression")
+first_list_expression_func = partial(get_first_rule, rule="list_expression")
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule
index 2f84d2cf6365f..fe48cad5d08db 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule
@@ -428,7 +428,7 @@ each_expression_body:   function_body
 
 let_expression: "let" NEWLINE WS_INLINE? variable_list WS_INLINE? NEWLINE? in_expression
 
-in_expression: "in" NEWLINE? WS_INLINE NEWLINE? expression
+in_expression: "in" NEWLINE? WS_INLINE? NEWLINE? expression
 
 variable_list:    variable
             |     variable NEWLINE? WS_INLINE? "," NEWLINE? WS_INLINE? variable_list
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index cea52c6703bb1..8074f3ff9a532 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -27,6 +27,7 @@
     'let\n    Source = PostgreSQL.Database("localhost"  ,   "mics"      ),\n  public_order_date =    Source{[Schema="public",Item="order_date"]}[Data] \n in \n public_order_date',
     'let\n    Source = Oracle.Database("localhost:1521/salesdb.GSLAB.COM", [HierarchicalNavigation=true]), HR = Source{[Schema="HR"]}[Data], EMPLOYEES1 = HR{[Name="EMPLOYEES"]}[Data] \n in EMPLOYEES1',
     'let\n    Source = Sql.Database("localhost", "library"),\n dbo_book_issue = Source{[Schema="dbo",Item="book_issue"]}[Data]\n in dbo_book_issue',
+    'let\n    Source = Snowflake.Databases("xaa48144.snowflakecomputing.com","GSL_TEST_WH",[Role="ACCOUNTADMIN"]),\n    GSL_TEST_DB_Database = Source{[Name="GSL_TEST_DB",Kind="Database"]}[Data],\n    PUBLIC_Schema = GSL_TEST_DB_Database{[Name="PUBLIC",Kind="Schema"]}[Data],\n    SALES_FORECAST_Table = PUBLIC_Schema{[Name="SALES_FORECAST",Kind="Table"]}[Data],\n    SALES_ANALYST_Table = PUBLIC_Schema{[Name="SALES_ANALYST",Kind="Table"]}[Data],\n    RESULT = Table.Combine({SALES_FORECAST_Table, SALES_ANALYST_Table})\n\nin\n    RESULT',
 ]
 
 
@@ -285,5 +286,58 @@ def test_native_query_disabled():
     data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
         table, reporter, native_query_enabled=False
     )
-
     assert len(data_platform_tables) == 0
+
+
+def test_multi_source_table():
+
+    table: PowerBiAPI.Table = PowerBiAPI.Table(
+        expression=M_QUERIES[12],  # 1st index has the native query
+        name="virtual_order_table",
+        full_name="OrderDataSet.virtual_order_table",
+    )
+
+    reporter = PowerBiDashboardSourceReport()
+    data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
+        table, reporter, native_query_enabled=False
+    )
+
+    assert len(data_platform_tables) == 2
+    assert data_platform_tables[0].full_name == "mics.public.order_date"
+    assert (
+        data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
+        == SupportedDataPlatform.POSTGRES_SQL.value.powerbi_data_platform_name
+    )
+
+    assert data_platform_tables[1].full_name == "GSL_TEST_DB.PUBLIC.SALES_ANALYST_VIEW"
+    assert (
+        data_platform_tables[1].data_platform_pair.powerbi_data_platform_name
+        == SupportedDataPlatform.SNOWFLAKE.value.powerbi_data_platform_name
+    )
+
+
+def test_table_combine():
+    table: PowerBiAPI.Table = PowerBiAPI.Table(
+        expression=M_QUERIES[16],  # 1st index has the native query
+        name="virtual_order_table",
+        full_name="OrderDataSet.virtual_order_table",
+    )
+
+    reporter = PowerBiDashboardSourceReport()
+
+    data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
+        table, reporter
+    )
+
+    assert len(data_platform_tables) == 2
+    assert data_platform_tables[0].full_name == "GSL_TEST_DB.PUBLIC.SALES_FORECAST"
+    assert (
+        data_platform_tables[0].data_platform_pair.powerbi_data_platform_name
+        == SupportedDataPlatform.SNOWFLAKE.value.powerbi_data_platform_name
+    )
+
+    assert data_platform_tables[1].full_name == "GSL_TEST_DB.PUBLIC.SALES_ANALYST"
+    assert (
+        data_platform_tables[1].data_platform_pair.powerbi_data_platform_name
+        == SupportedDataPlatform.SNOWFLAKE.value.powerbi_data_platform_name
+    )

From 1e2dc90db689fdbb5077690a8f6575638f44dd40 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Mon, 26 Dec 2022 21:45:29 +0530
Subject: [PATCH 42/53] remove un-wanted code

---
 .../source/powerbi/m_query/resolver.py        | 25 -------------------
 1 file changed, 25 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
index 592e937e1257c..da5740539fd05 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
@@ -216,31 +216,6 @@ def _create_or_update_identifier_accessor(
         key_vs_value: Dict[str, Any],
     ) -> IdentifierAccessor:
 
-        # def create_item_selector(items: Dict[str, Any], _next: IdentifierAccessor):
-        #     return ItemSelector(
-        #         items=items,
-        #         next=_next,
-        #     )
-        #
-        # def update_identifier_accessor(node: IdentifierAccessor, identifier: str, items: Dict[str, Any]) -> bool:
-        #     flag: bool = False
-        #     if node.identifier == identifier:
-        #         node.item_selectors.append(
-        #             create_item_selector(
-        #                 items=items
-        #             )
-        #         )
-        #         return True
-        #
-        #     for item_selector in node.item_selectors:
-        #         if item_selector.next is None:
-        #             continue
-        #         flag = update_identifier_accessor(item_selector.next, identifier, items)
-        #         if flag is True:
-        #             break
-        #
-        #     return flag
-
         # It is first identifier_accessor
         if identifier_accessor is None:
             return IdentifierAccessor(

From 6cb46caca1cf841c04b3d718eb7046bfab948c89 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Mon, 26 Dec 2022 21:50:59 +0530
Subject: [PATCH 43/53] Add new line

---
 .../src/datahub/ingestion/source/powerbi/m_query/resolver.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
index da5740539fd05..0dfac5767426b 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
@@ -399,12 +399,15 @@ def two_level_access_pattern(
             return full_table_names
 
         db_name: str = arguments[1]
+
         schema_name: str = cast(
             IdentifierAccessor, data_access_func_detail.identifier_accessor
         ).items["Schema"]
+
         table_name: str = cast(
             IdentifierAccessor, data_access_func_detail.identifier_accessor
         ).items["Item"]
+
         full_table_names.append(f"{db_name}.{schema_name}.{table_name}")
 
         LOGGER.debug("PostgreSQL full-table-names = %s", full_table_names)
@@ -499,10 +502,12 @@ def get_full_table_names(
         schema_name: str = cast(
             IdentifierAccessor, data_access_func_detail.identifier_accessor
         ).items["Schema"]
+
         table_name: str = cast(
             IdentifierAccessor,
             cast(IdentifierAccessor, data_access_func_detail.identifier_accessor).next,
         ).items["Name"]
+
         full_table_names.append(f"{db_name}.{schema_name}.{table_name}")
 
         return full_table_names

From 63b9b07a076e56e50a3c209575bdb69fbff7ad04 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Mon, 26 Dec 2022 22:13:41 +0530
Subject: [PATCH 44/53] review comments

---
 metadata-ingestion/docs/sources/powerbi/powerbi_pre.md        | 2 +-
 .../src/datahub/ingestion/source/powerbi/proxy.py             | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md b/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md
index 24f7b92cf8998..c71c46700903f 100644
--- a/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md
+++ b/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md
@@ -29,7 +29,7 @@ PowerBI Source supports M-Query expression for below listed PowerBI Data Sources
 1.  Snowflake 
 2.  Oracle 
 3.  PostgreSQL
-4.  MS-SQL 
+4.  Microsoft SQL Server
 
 Native SQL query parsing is only supported for `Snowflake` data-source and only first table from `FROM` clause will be ingested as upstream table. Advance SQL construct like JOIN and SUB-QUERIES in `FROM` clause are not supported.
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/proxy.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/proxy.py
index a7e027551290a..e243b263c5da6 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/proxy.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/proxy.py
@@ -269,7 +269,7 @@ def __get_users(self, workspace_id: str, entity: str, _id: str) -> List[User]:
 
         return users
 
-    def __get_report(
+    def _get_report(
         self, workspace_id: str, report_id: str
     ) -> Optional["PowerBiAPI.Report"]:
         """
@@ -527,7 +527,7 @@ def new_dataset_or_report(tile_instance: Any) -> dict:
                     else None
                 ),
                 "report": (
-                    self.__get_report(
+                    self._get_report(
                         workspace_id=workspace.id,
                         report_id=tile_instance.get("reportId"),
                     )

From 6b7470c1a2706f1cf929fa9598b1e87b0c182062 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Mon, 26 Dec 2022 22:14:35 +0530
Subject: [PATCH 45/53] Review comments

---
 .../src/datahub/ingestion/source/powerbi/proxy.py               | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/proxy.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/proxy.py
index e243b263c5da6..dc7c0dbfac0e1 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/proxy.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/proxy.py
@@ -222,7 +222,7 @@ def __get_users(self, workspace_id: str, entity: str, _id: str) -> List[User]:
         users: List[PowerBiAPI.User] = []
         if self.__config.extract_ownership is False:
             LOGGER.info(
-                "ExtractOwnership capabilities is disabled from configuration and hence returning empty users list"
+                "Extract ownership capabilities is disabled from configuration and hence returning empty users list"
             )
             return users
 

From b378151ad0e562ae17222d0b49675b17951fa9a4 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Mon, 26 Dec 2022 22:29:12 +0530
Subject: [PATCH 46/53] rename methods

---
 .../ingestion/source/powerbi/powerbi.py       | 140 ++++++++++--------
 1 file changed, 75 insertions(+), 65 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
index 61119a8658d58..e3714ecd4eb7f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
@@ -114,7 +114,7 @@ def new_mcp(
             aspect=aspect,
         )
 
-    def __to_work_unit(
+    def _to_work_unit(
         self, mcp: MetadataChangeProposalWrapper
     ) -> EquableMetadataWorkUnit:
         return Mapper.EquableMetadataWorkUnit(
@@ -126,7 +126,67 @@ def __to_work_unit(
             mcp=mcp,
         )
 
-    def __to_datahub_dataset(
+    def extract_lineage(
+        self, table: PowerBiAPI.Table, ds_urn: str
+    ) -> List[MetadataChangeProposalWrapper]:
+        mcps: List[MetadataChangeProposalWrapper] = []
+
+        upstreams: List[UpstreamClass] = []
+        upstream_tables: List[resolver.DataPlatformTable] = parser.get_upstream_tables(
+            table, self.__reporter
+        )
+
+        for upstream_table in upstream_tables:
+            if (
+                upstream_table.data_platform_pair.powerbi_data_platform_name
+                not in self.__config.dataset_type_mapping.keys()
+            ):
+                LOGGER.debug("Skipping upstream table for %s", ds_urn)
+                continue
+
+            platform: Union[str, PlatformDetail] = self.__config.dataset_type_mapping[
+                upstream_table.data_platform_pair.powerbi_data_platform_name
+            ]
+
+            platform_name: str = (
+                upstream_table.data_platform_pair.datahub_data_platform_name
+            )
+
+            platform_instance_name: Optional[str] = None
+            platform_env: str = DEFAULT_ENV
+            # Determine if PlatformDetail is provided
+            if isinstance(platform, PlatformDetail):
+                platform_instance_name = cast(
+                    PlatformDetail, platform
+                ).platform_instance
+                platform_env = cast(PlatformDetail, platform).env
+
+            upstream_urn = builder.make_dataset_urn_with_platform_instance(
+                platform=platform_name,
+                platform_instance=platform_instance_name,
+                env=platform_env,
+                name=self.lineage_urn_to_lowercase(upstream_table.full_name),
+            )
+
+            upstream_table_class = UpstreamClass(
+                upstream_urn,
+                DatasetLineageTypeClass.TRANSFORMED,
+            )
+            upstreams.append(upstream_table_class)
+
+            if len(upstreams) > 0:
+                upstream_lineage = UpstreamLineageClass(upstreams=upstreams)
+                mcp = MetadataChangeProposalWrapper(
+                    entityType="dataset",
+                    changeType=ChangeTypeClass.UPSERT,
+                    entityUrn=ds_urn,
+                    aspect=upstream_lineage,
+                )
+                mcps.append(mcp)
+
+        return mcps
+
+    def to_datahub_dataset(
         self, dataset: Optional[PowerBiAPI.PowerBIDataset]
     ) -> List[MetadataChangeProposalWrapper]:
         """
@@ -173,61 +233,11 @@ def __to_datahub_dataset(
             dataset_mcps.extend([info_mcp, status_mcp])
 
             if self.__config.extract_lineage is True:
-                # Check if upstreams table is available, parse them and create dataset URN for each upstream table
-                upstreams: List[UpstreamClass] = []
-                upstream_tables: List[
-                    resolver.DataPlatformTable
-                ] = parser.get_upstream_tables(table, self.__reporter)
-                for upstream_table in upstream_tables:
-                    if (
-                        upstream_table.data_platform_pair.powerbi_data_platform_name
-                        not in self.__config.dataset_type_mapping.keys()
-                    ):
-                        LOGGER.debug("Skipping upstream table for %s", ds_urn)
-                        continue
-
-                    platform: Union[
-                        str, PlatformDetail
-                    ] = self.__config.dataset_type_mapping[
-                        upstream_table.data_platform_pair.powerbi_data_platform_name
-                    ]
-                    platform_name: str = (
-                        upstream_table.data_platform_pair.datahub_data_platform_name
-                    )
-                    platform_instance_name: Optional[str] = None
-                    platform_env: str = DEFAULT_ENV
-                    # Determine if PlatformDetail is provided
-                    if isinstance(platform, PlatformDetail):
-                        platform_instance_name = cast(
-                            PlatformDetail, platform
-                        ).platform_instance
-                        platform_env = cast(PlatformDetail, platform).env
-
-                    upstream_urn = builder.make_dataset_urn_with_platform_instance(
-                        platform=platform_name,
-                        platform_instance=platform_instance_name,
-                        env=platform_env,
-                        name=self.lineage_urn_to_lowercase(upstream_table.full_name),
-                    )
-                    upstream_table_class = UpstreamClass(
-                        upstream_urn,
-                        DatasetLineageTypeClass.TRANSFORMED,
-                    )
-                    upstreams.append(upstream_table_class)
-
-                    if len(upstreams) > 0:
-                        upstream_lineage = UpstreamLineageClass(upstreams=upstreams)
-                        mcp = MetadataChangeProposalWrapper(
-                            entityType="dataset",
-                            changeType=ChangeTypeClass.UPSERT,
-                            entityUrn=ds_urn,
-                            aspect=upstream_lineage,
-                        )
-                        dataset_mcps.extend([mcp])
+                dataset_mcps.extend(self.extract_lineage(table, ds_urn))
 
         return dataset_mcps
 
-    def __to_datahub_chart(
+    def to_datahub_chart_mcp(
         self, tile: PowerBiAPI.Tile, ds_mcps: List[MetadataChangeProposalWrapper]
     ) -> List[MetadataChangeProposalWrapper]:
         """
@@ -306,7 +316,7 @@ def to_urn_set(self, mcps: List[MetadataChangeProposalWrapper]) -> List[str]:
             ]
         )
 
-    def __to_datahub_dashboard(
+    def to_datahub_dashboard_mcp(
         self,
         dashboard: PowerBiAPI.Dashboard,
         chart_mcps: List[MetadataChangeProposalWrapper],
@@ -485,9 +495,9 @@ def to_datahub_chart(
             if tile is None:
                 continue
             # First convert the dataset to MCP, because dataset mcp is used in input attribute of chart mcp
-            dataset_mcps = self.__to_datahub_dataset(tile.dataset)
+            dataset_mcps = self.to_datahub_dataset(tile.dataset)
             # Now convert tile to chart MCP
-            chart_mcp = self.__to_datahub_chart(tile, dataset_mcps)
+            chart_mcp = self.to_datahub_chart_mcp(tile, dataset_mcps)
 
             ds_mcps.extend(dataset_mcps)
             chart_mcps.extend(chart_mcp)
@@ -514,7 +524,7 @@ def to_datahub_work_units(
         # Lets convert dashboard to datahub dashboard
         dashboard_mcps: List[
             MetadataChangeProposalWrapper
-        ] = self.__to_datahub_dashboard(dashboard, chart_mcps, user_mcps)
+        ] = self.to_datahub_dashboard_mcp(dashboard, chart_mcps, user_mcps)
 
         # Now add MCPs in sequence
         mcps.extend(ds_mcps)
@@ -523,11 +533,11 @@ def to_datahub_work_units(
         mcps.extend(dashboard_mcps)
 
         # Convert MCP to work_units
-        work_units = map(self.__to_work_unit, mcps)
+        work_units = map(self._to_work_unit, mcps)
         # Return set of work_unit
         return deduplicate_list([wu for wu in work_units if wu is not None])
 
-    def __pages_to_chart(
+    def pages_to_chart(
         self, pages: List[PowerBiAPI.Page], ds_mcps: List[MetadataChangeProposalWrapper]
     ) -> List[MetadataChangeProposalWrapper]:
 
@@ -588,7 +598,7 @@ def to_chart_mcps(
 
         return chart_mcps
 
-    def __report_to_dashboard(
+    def report_to_dashboard(
         self,
         workspace_name: str,
         report: PowerBiAPI.Report,
@@ -701,11 +711,11 @@ def report_to_datahub_work_units(
         # Convert user to CorpUser
         user_mcps = self.to_datahub_users(report.users)
         # Convert pages to charts. A report has single dataset and same dataset used in pages to create visualization
-        ds_mcps = self.__to_datahub_dataset(report.dataset)
-        chart_mcps = self.__pages_to_chart(report.pages, ds_mcps)
+        ds_mcps = self.to_datahub_dataset(report.dataset)
+        chart_mcps = self.pages_to_chart(report.pages, ds_mcps)
 
         # Let's convert report to datahub dashboard
-        report_mcps = self.__report_to_dashboard(
+        report_mcps = self.report_to_dashboard(
             workspace.name, report, chart_mcps, user_mcps
         )
 
@@ -716,7 +726,7 @@ def report_to_datahub_work_units(
         mcps.extend(report_mcps)
 
         # Convert MCP to work_units
-        work_units = map(self.__to_work_unit, mcps)
+        work_units = map(self._to_work_unit, mcps)
         return work_units
 
 

From 0bec288bfd62be1eac160e22cfbb0b0654992dd1 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Tue, 27 Dec 2022 23:15:30 +0530
Subject: [PATCH 47/53] updated doc

---
 .../docs/sources/powerbi/powerbi_pre.md       | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md b/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md
index c71c46700903f..d6655dadc2642 100644
--- a/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md
+++ b/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md
@@ -20,6 +20,11 @@ See the
 If Tile is created from report then Chart.externalUrl is set to Report.webUrl.
 
 ## Lineage
+
+This source extract table lineage for tables present in Power BI Datasets. Lets consider a PowerBI Dataset `SALES_REPORT` and a PostgreSQL database is configured as data-source in `SALES_REPORT` dataset. 
+
+Consider `SALES_REPORT` PowerBI Dataset has a table `SALES_ANALYSIS` which is backed by `SALES_ANALYSIS_VIEW` of PostgreSQL Database then in this case `SALES_ANALYSIS_VIEW` will appear as upstream dataset for `SALES_ANALYSIS` table.
+
 You can control table lineage ingestion using `extract_lineage` configuration parameter, by default it is set to `true`. 
 
 PowerBI Source extracts the lineage information by parsing PowerBI M-Query expression.
@@ -62,3 +67,31 @@ in
   #"Added Conditional Column"
 ```
 
+## M-Query Pattern Supported For Lineage Extraction
+Lets consider a M-Query which combine two PostgreSQL tables. Such M-Query can be written as per below patterns.
+
+**Pattern-1**
+
+```shell
+let
+Source = PostgreSQL.Database("localhost", "book_store"),
+book_date = Source{[Schema="public",Item="book"]}[Data],
+issue_history = Source{[Schema="public",Item="issue_history"]}[Data],
+combine_result  = Table.Combine({book_date, issue_history})
+in
+combine_result
+```
+
+**Pattern-2**
+
+```shell
+let
+Source = PostgreSQL.Database("localhost", "book_store"),
+combine_result  = Table.Combine({Source{[Schema="public",Item="book"]}[Data], Source{[Schema="public",Item="issue_history"]}[Data]})
+in
+combine_result
+```
+
+`Pattern-2` is *not* supported for upstream table lineage extraction as it uses nested item-selector i.e. {Source{[Schema="public",Item="book"]}[Data], Source{[Schema="public",Item="issue_history"]}[Data]} as argument to M-QUery table function i.e. Table.Combine
+
+`Pattern-1` is supported as it first assign the table from schema to variable and then variable is used in M-Query Table function i.e. Table.Combine

From 7ce75dc399ba6e88d9f2143b2a3edface8bcdd93 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Wed, 28 Dec 2022 12:13:01 +0530
Subject: [PATCH 48/53] support join in native query

---
 .../powerbi/m_query/native_sql_parser.py      | 17 +++---
 .../source/powerbi/m_query/parser.py          |  1 -
 .../source/powerbi/m_query/resolver.py        |  2 +
 .../ingestion/source/powerbi/powerbi.py       |  6 +-
 .../powerbi/golden_test_lineage.json          | 58 ++++++++++++++++++-
 .../tests/integration/powerbi/test_powerbi.py | 13 +++++
 6 files changed, 84 insertions(+), 13 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py
index e64c3b77cff93..cfb3b4769722e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py
@@ -18,10 +18,8 @@ def remove_special_characters(native_query: str) -> str:
 def get_tables(native_query: str) -> List[str]:
     native_query = remove_special_characters(native_query)
     LOGGER.debug("Processing query = %s", native_query)
-    # As per current use-case, we are extracting only single table from "from"
     tables: List[str] = []
     parsed = sqlparse.parse(native_query)[0]
-
     tokens: List[sqlparse.sql.Token] = list(parsed.tokens)
     length: int = len(tokens)
     from_index: int = -1
@@ -34,17 +32,16 @@ def get_tables(native_query: str) -> List[str]:
             from_index = index + 1
             break
 
-    table_name = None
-
-    while from_index < length:
+    # Collect all identifier after FROM clause till we reach to the end or WHERE clause encounter
+    while (
+        from_index < length
+        and isinstance(tokens[from_index], sqlparse.sql.Where) is not True
+    ):
         LOGGER.debug("%s=%s", tokens[from_index].value, tokens[from_index].ttype)
         LOGGER.debug("Type=%s", type(tokens[from_index]))
         if isinstance(tokens[from_index], sqlparse.sql.Identifier):
-            table_name = tokens[from_index].value
-            break
+            # Split on as keyword and collect the table name from 0th position. strip any spaces
+            tables.append(tokens[from_index].value.split("as")[0].strip())
         from_index = from_index + 1
 
-    if table_name is not None:
-        tables.append(table_name)
-
     return tables
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
index 35af1fb89f3b2..1731fa250e0dd 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
@@ -43,7 +43,6 @@ def get_upstream_tables(
 
     try:
         parse_tree: Tree = _parse_expression(table.expression)
-        print(parse_tree.pretty())
         valid, message = validator.validate_parse_tree(
             parse_tree, native_query_enabled=native_query_enabled
         )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
index 0dfac5767426b..77370a4f07727 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
@@ -590,6 +590,8 @@ def get_full_table_names(
                     "Skipping table (%s) as it is not as per full_table_name format",
                     table,
                 )
+                continue
+
             full_table_names.append(table)
 
         return full_table_names
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
index e3714ecd4eb7f..a6f8a5c211cf7 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
@@ -141,7 +141,11 @@ def extract_lineage(
                 upstream_table.data_platform_pair.powerbi_data_platform_name
                 not in self.__config.dataset_type_mapping.keys()
             ):
-                LOGGER.debug("Skipping upstream table for %s", ds_urn)
+                LOGGER.debug(
+                    "Skipping upstream table for %s. The platform (%s) is not part of dataset_type_mapping",
+                    ds_urn,
+                    upstream_table.data_platform_pair.powerbi_data_platform_name,
+                )
                 continue
 
             platform: Union[str, PlatformDetail] = self.__config.dataset_type_mapping[
diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json b/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json
index d59d38b7d17a9..14a81cedf6db1 100644
--- a/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json
@@ -111,6 +111,62 @@
         "runId": "powerbi-lineage-test"
     }
 },
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"snowflake native-query-with-join\", \"description\": \"snowflake native-query-with-join\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,sn-2.GSL_TEST_DB.PUBLIC.SALES_ANALYST,PROD)\", \"type\": \"TRANSFORMED\"}, {\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,sn-2.GSL_TEST_DB.PUBLIC.SALES_FORECAST,PROD)\", \"type\": \"TRANSFORMED\"}]}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,sn-2.GSL_TEST_DB.PUBLIC.SALES_ANALYST,PROD)\", \"type\": \"TRANSFORMED\"}, {\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:snowflake,sn-2.GSL_TEST_DB.PUBLIC.SALES_FORECAST,PROD)\", \"type\": \"TRANSFORMED\"}]}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-lineage-test"
+    }
+},
 {
     "entityType": "dataset",
     "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
@@ -369,7 +425,7 @@
     "changeType": "UPSERT",
     "aspectName": "chartInfo",
     "aspect": {
-        "value": "{\"customProperties\": {\"datasetId\": \"05169CD2-E713-41E6-9600-1D8066D95445\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"test_tile\", \"description\": \"test_tile\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)\"}]}",
+        "value": "{\"customProperties\": {\"datasetId\": \"05169CD2-E713-41E6-9600-1D8066D95445\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"test_tile\", \"description\": \"test_tile\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)\"}]}",
         "contentType": "application/json"
     },
     "systemMetadata": {
diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
index d6ae1b033b10c..4afc112f1b2d1 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
@@ -316,6 +316,19 @@ def register_mock_api(request_mock):
                                             }
                                         ],
                                     },
+                                    {
+                                        "name": "snowflake native-query-with-join",
+                                        "source": [
+                                            {
+                                                "expression": 'let\n    Source = Value.NativeQuery(Snowflake.Databases("xaa48144.snowflakecomputing.com","GSL_TEST_WH",[Role="ACCOUNTADMIN"]){[Name="GSL_TEST_DB"]}[Data], "select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, \'mo\')", null, [EnableFolding=true])\nin\n    Source',
+                                            }
+                                        ],
+                                        "datasourceUsages": [
+                                            {
+                                                "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3",
+                                            }
+                                        ],
+                                    },
                                     {
                                         "name": "job-history",
                                         "source": [

From 383697e900abab7820181eac9870aa879b524168 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Wed, 28 Dec 2022 12:29:30 +0530
Subject: [PATCH 49/53] integration test fix for native query

---
 .../golden_test_disabled_ownership.json       | 30 ++++++++-
 .../powerbi/golden_test_ingest.json           | 32 +++++++++-
 .../golden_test_lower_case_urn_ingest.json    | 30 ++++++++-
 .../powerbi/golden_test_report.json           | 62 ++++++++++++++++++-
 .../golden_test_scan_all_workspaces.json      | 30 ++++++++-
 5 files changed, 176 insertions(+), 8 deletions(-)

diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json b/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json
index 528477ca3d945..4590fef410601 100644
--- a/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json
@@ -83,6 +83,34 @@
         "runId": "powerbi-test"
     }
 },
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"snowflake native-query-with-join\", \"description\": \"snowflake native-query-with-join\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
 {
     "entityType": "dataset",
     "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
@@ -201,7 +229,7 @@
     "changeType": "UPSERT",
     "aspectName": "chartInfo",
     "aspect": {
-        "value": "{\"customProperties\": {\"datasetId\": \"05169CD2-E713-41E6-9600-1D8066D95445\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"test_tile\", \"description\": \"test_tile\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)\"}]}",
+        "value": "{\"customProperties\": {\"datasetId\": \"05169CD2-E713-41E6-9600-1D8066D95445\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"test_tile\", \"description\": \"test_tile\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)\"}]}",
         "contentType": "application/json"
     },
     "systemMetadata": {
diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json b/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json
index 4646baa3ad141..c0568fd7385f4 100644
--- a/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json
@@ -83,6 +83,34 @@
         "runId": "powerbi-test"
     }
 },
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"snowflake native-query-with-join\", \"description\": \"snowflake native-query-with-join\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
 {
     "entityType": "dataset",
     "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
@@ -285,7 +313,7 @@
     "changeType": "UPSERT",
     "aspectName": "chartInfo",
     "aspect": {
-        "value": "{\"customProperties\": {\"datasetId\": \"05169CD2-E713-41E6-9600-1D8066D95445\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"test_tile\", \"description\": \"test_tile\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)\"}]}",
+        "value": "{\"customProperties\": {\"datasetId\": \"05169CD2-E713-41E6-9600-1D8066D95445\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"test_tile\", \"description\": \"test_tile\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)\"}]}",
         "contentType": "application/json"
     },
     "systemMetadata": {
@@ -433,4 +461,4 @@
         "runId": "powerbi-test"
     }
 }
-]
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json b/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json
index 2eabb5dcc45f1..fdb243a0e727d 100644
--- a/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json
@@ -83,6 +83,34 @@
         "runId": "powerbi-test"
     }
 },
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"snowflake native-query-with-join\", \"description\": \"snowflake native-query-with-join\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
 {
     "entityType": "dataset",
     "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
@@ -285,7 +313,7 @@
     "changeType": "UPSERT",
     "aspectName": "chartInfo",
     "aspect": {
-        "value": "{\"customProperties\": {\"datasetId\": \"05169CD2-E713-41E6-9600-1D8066D95445\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"test_tile\", \"description\": \"test_tile\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_testtable,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)\"}]}",
+        "value": "{\"customProperties\": {\"datasetId\": \"05169CD2-E713-41E6-9600-1D8066D95445\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"test_tile\", \"description\": \"test_tile\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_testtable,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)\"}]}",
         "contentType": "application/json"
     },
     "systemMetadata": {
diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_report.json b/metadata-ingestion/tests/integration/powerbi/golden_test_report.json
index 9092d5bc6ea7f..9e0a4f348d00d 100644
--- a/metadata-ingestion/tests/integration/powerbi/golden_test_report.json
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_report.json
@@ -83,6 +83,34 @@
         "runId": "powerbi-test"
     }
 },
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"snowflake native-query-with-join\", \"description\": \"snowflake native-query-with-join\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
 {
     "entityType": "dataset",
     "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
@@ -285,7 +313,7 @@
     "changeType": "UPSERT",
     "aspectName": "chartInfo",
     "aspect": {
-        "value": "{\"customProperties\": {\"datasetId\": \"05169CD2-E713-41E6-9600-1D8066D95445\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"test_tile\", \"description\": \"test_tile\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)\"}]}",
+        "value": "{\"customProperties\": {\"datasetId\": \"05169CD2-E713-41E6-9600-1D8066D95445\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"test_tile\", \"description\": \"test_tile\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)\"}]}",
         "contentType": "application/json"
     },
     "systemMetadata": {
@@ -517,6 +545,34 @@
         "runId": "powerbi-test"
     }
 },
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"snowflake native-query-with-join\", \"description\": \"snowflake native-query-with-join\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
 {
     "entityType": "dataset",
     "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
@@ -663,7 +719,7 @@
     "changeType": "UPSERT",
     "aspectName": "chartInfo",
     "aspect": {
-        "value": "{\"customProperties\": {\"order\": \"0\"}, \"title\": \"ReportSection\", \"description\": \"Regional Sales Analysis\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)\"}]}",
+        "value": "{\"customProperties\": {\"order\": \"0\"}, \"title\": \"ReportSection\", \"description\": \"Regional Sales Analysis\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)\"}]}",
         "contentType": "application/json"
     },
     "systemMetadata": {
@@ -691,7 +747,7 @@
     "changeType": "UPSERT",
     "aspectName": "chartInfo",
     "aspect": {
-        "value": "{\"customProperties\": {\"order\": \"1\"}, \"title\": \"ReportSection1\", \"description\": \"Geographic Analysis\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)\"}]}",
+        "value": "{\"customProperties\": {\"order\": \"1\"}, \"title\": \"ReportSection1\", \"description\": \"Geographic Analysis\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)\"}]}",
         "contentType": "application/json"
     },
     "systemMetadata": {
diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_scan_all_workspaces.json b/metadata-ingestion/tests/integration/powerbi/golden_test_scan_all_workspaces.json
index 255a907e39b8f..14e47301af7a0 100644
--- a/metadata-ingestion/tests/integration/powerbi/golden_test_scan_all_workspaces.json
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_scan_all_workspaces.json
@@ -83,6 +83,34 @@
         "runId": "powerbi-test"
     }
 },
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "value": "{\"customProperties\": {}, \"name\": \"snowflake native-query-with-join\", \"description\": \"snowflake native-query-with-join\", \"tags\": []}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "value": "{\"removed\": false}",
+        "contentType": "application/json"
+    },
+    "systemMetadata": {
+        "lastObserved": 1643871600000,
+        "runId": "powerbi-test"
+    }
+},
 {
     "entityType": "dataset",
     "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
@@ -201,7 +229,7 @@
     "changeType": "UPSERT",
     "aspectName": "chartInfo",
     "aspect": {
-        "value": "{\"customProperties\": {\"datasetId\": \"05169CD2-E713-41E6-9600-1D8066D95445\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"test_tile\", \"description\": \"test_tile\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)\"}]}",
+        "value": "{\"customProperties\": {\"datasetId\": \"05169CD2-E713-41E6-9600-1D8066D95445\", \"reportId\": \"\", \"datasetWebUrl\": \"http://localhost/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/details\", \"createdFrom\": \"Dataset\"}, \"title\": \"test_tile\", \"description\": \"test_tile\", \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"inputs\": [{\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)\"}, {\"string\": \"urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)\"}]}",
         "contentType": "application/json"
     },
     "systemMetadata": {

From 1efcb98a88c04903cc487418c9285812cc00d000 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Wed, 28 Dec 2022 12:29:54 +0530
Subject: [PATCH 50/53] native sql query unit test

---
 .../powerbi/test_native_sql_parser.py         | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 metadata-ingestion/tests/integration/powerbi/test_native_sql_parser.py

diff --git a/metadata-ingestion/tests/integration/powerbi/test_native_sql_parser.py b/metadata-ingestion/tests/integration/powerbi/test_native_sql_parser.py
new file mode 100644
index 0000000000000..53e184515c1d8
--- /dev/null
+++ b/metadata-ingestion/tests/integration/powerbi/test_native_sql_parser.py
@@ -0,0 +1,21 @@
+from typing import List
+
+from datahub.ingestion.source.powerbi.m_query import native_sql_parser
+
+
+def test_join():
+    query: str = "select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')"
+    tables: List[str] = native_sql_parser.get_tables(query)
+
+    assert len(tables) == 2
+    assert tables[0] == "GSL_TEST_DB.PUBLIC.SALES_ANALYST"
+    assert tables[1] == "GSL_TEST_DB.PUBLIC.SALES_FORECAST"
+
+
+def test_simple_from():
+    query: str = "SELECT#(lf)concat((UPPER(REPLACE(SELLER,'-',''))), MONTHID) as AGENT_KEY,#(lf)concat((UPPER(REPLACE(CLIENT_DIRECTOR,'-',''))), MONTHID) as CD_AGENT_KEY,#(lf) *#(lf)FROM#(lf)OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4"
+
+    tables: List[str] = native_sql_parser.get_tables(query)
+
+    assert len(tables) == 1
+    assert tables[0] == "OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_APS_SME_UNITS_V4"

From fd911105ebfacf4d867b11b9f800a1511109a8e8 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Wed, 28 Dec 2022 12:57:31 +0530
Subject: [PATCH 51/53] review comment

---
 .../source/powerbi/m_query/parser.py          |  2 +-
 .../datahub/ingestion/source/powerbi/proxy.py | 21 ++++++++++---------
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
index 1731fa250e0dd..7ea28d5b579fa 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
@@ -38,7 +38,7 @@ def get_upstream_tables(
     native_query_enabled: bool = True,
 ) -> List[resolver.DataPlatformTable]:
     if table.expression is None:
-        reporter.report_warning(table.full_name, "Expression is none")
+        LOGGER.debug(table.full_name, "Expression is none")
         return []
 
     try:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/proxy.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/proxy.py
index dc7c0dbfac0e1..f6998f57fc0b1 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/proxy.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/proxy.py
@@ -752,26 +752,28 @@ def create_scan_job():
             LOGGER.info("Scan id({})".format(id))
             return id
 
-        def wait_for_scan_to_complete(scan_id: str, timeout: int) -> Any:
-            """
-            Poll the PowerBi service for workspace scan to complete
-            """
-            minimum_sleep = 3
+        def calculate_max_trial(minimum_sleep: int, timeout: int) -> int:
             if timeout < minimum_sleep:
                 LOGGER.info(
                     f"Setting timeout to minimum_sleep time {minimum_sleep} seconds"
                 )
                 timeout = minimum_sleep
 
-            max_trial = timeout // minimum_sleep
+            return timeout // minimum_sleep
+
+        def wait_for_scan_to_complete(scan_id: str, timeout: int) -> Any:
+            """
+            Poll the PowerBi service for workspace scan to complete
+            """
+            minimum_sleep = 3
+            max_trial: int = calculate_max_trial(minimum_sleep, timeout)
             LOGGER.info(f"Max trial {max_trial}")
+
             scan_get_endpoint = PowerBiAPI.API_ENDPOINTS[Constant.SCAN_GET]
             scan_get_endpoint = scan_get_endpoint.format(
                 POWERBI_ADMIN_BASE_URL=PowerBiAPI.ADMIN_BASE_URL, SCAN_ID=scan_id
             )
-
             LOGGER.info(f"Hitting URL={scan_get_endpoint}")
-
             trail = 1
             while True:
                 LOGGER.info(f"Trial = {trail}")
@@ -781,9 +783,7 @@ def wait_for_scan_to_complete(scan_id: str, timeout: int) -> Any:
                 )
                 if res.status_code != 200:
                     message = f"API({scan_get_endpoint}) return error code {res.status_code} for scan id({scan_id})"
-
                     LOGGER.warning(message)
-
                     raise ConnectionError(message)
 
                 if res.json()["status"].upper() == "Succeeded".upper():
@@ -792,6 +792,7 @@ def wait_for_scan_to_complete(scan_id: str, timeout: int) -> Any:
 
                 if trail == max_trial:
                     break
+
                 LOGGER.info(f"Sleeping for {minimum_sleep} seconds")
                 sleep(minimum_sleep)
                 trail += 1

From 97b8b7f8ad57941e212e3c3b4109b32a28ea1f51 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Wed, 28 Dec 2022 13:36:30 +0530
Subject: [PATCH 52/53] updated config

---
 .../src/datahub/ingestion/source/powerbi/config.py          | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
index de9345fff18b4..4892fa37e2fe9 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
@@ -1,6 +1,6 @@
 import logging
 from dataclasses import dataclass, field as dataclass_field
-from typing import Dict, List, Union, Optional
+from typing import Dict, List, Optional, Union
 
 import pydantic
 from pydantic import validator
@@ -103,8 +103,8 @@ class PowerBiAPIConfig(EnvBasedSourceConfigBase):
     tenant_id: str = pydantic.Field(description="PowerBI tenant identifier")
     # PowerBi workspace identifier
     workspace_id: Optional[str] = pydantic.Field(
-        description="[deprecated] Use workspace_id_pattern instead",
-        default=None)
+        description="[deprecated] Use workspace_id_pattern instead", default=None
+    )
     # PowerBi workspace identifier
     workspace_id_pattern: AllowDenyPattern = pydantic.Field(
         default=AllowDenyPattern.allow_all(),

From 9f480369f149b11bd2575dd4bc79391528aa3a19 Mon Sep 17 00:00:00 2001
From: MohdSiddique Bagwan <mohdsiddique.bagwan@gslab.com>
Date: Thu, 29 Dec 2022 13:25:29 +0530
Subject: [PATCH 53/53] review comments

---
 .../ingestion/source/powerbi/config.py        |   8 +-
 .../powerbi/m_query/native_sql_parser.py      |  10 +-
 .../source/powerbi/m_query/parser.py          |  32 ++--
 .../source/powerbi/m_query/resolver.py        |  66 ++++----
 .../source/powerbi/m_query/tree_function.py   |  10 +-
 .../source/powerbi/m_query/validator.py       |  28 +---
 .../ingestion/source/powerbi/powerbi.py       |  36 ++---
 .../datahub/ingestion/source/powerbi/proxy.py | 150 +++++++++---------
 8 files changed, 163 insertions(+), 177 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
index 4892fa37e2fe9..c3115aae60395 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
@@ -11,7 +11,7 @@
 from datahub.configuration.source_common import DEFAULT_ENV, EnvBasedSourceConfigBase
 from datahub.ingestion.api.source import SourceReport
 
-LOGGER = logging.getLogger(__name__)
+logger = logging.getLogger(__name__)
 
 
 class Constant:
@@ -88,7 +88,7 @@ def report_charts_dropped(self, view: str) -> None:
 
 @dataclass
 class PlatformDetail:
-    platform_instance: str = pydantic.Field(
+    platform_instance: Optional[str] = pydantic.Field(
         default=None,
         description="DataHub platform instance name. It should be same as you have used in ingestion receipe of DataHub platform ingestion source of particular platform",
     )
@@ -174,14 +174,14 @@ def workspace_id_backward_compatibility(cls, values: Dict) -> Dict:
         workspace_id_pattern = values.get("workspace_id_pattern")
 
         if workspace_id_pattern == AllowDenyPattern.allow_all() and workspace_id:
-            LOGGER.warning(
+            logger.warning(
                 "workspace_id_pattern is not set but workspace_id is set, setting workspace_id as workspace_id_pattern. workspace_id will be deprecated, please use workspace_id_pattern instead."
             )
             values["workspace_id_pattern"] = AllowDenyPattern(
                 allow=[f"^{workspace_id}$"]
             )
         elif workspace_id_pattern != AllowDenyPattern.allow_all() and workspace_id:
-            LOGGER.warning(
+            logger.warning(
                 "workspace_id will be ignored in favour of workspace_id_pattern. workspace_id will be deprecated, please use workspace_id_pattern only."
             )
             values.pop("workspace_id")
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py
index cfb3b4769722e..5e78048629403 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py
@@ -5,7 +5,7 @@
 
 SPECIAL_CHARACTERS = ["#(lf)", "(lf)"]
 
-LOGGER = logging.getLogger()
+logger = logging.getLogger()
 
 
 def remove_special_characters(native_query: str) -> str:
@@ -17,14 +17,14 @@ def remove_special_characters(native_query: str) -> str:
 
 def get_tables(native_query: str) -> List[str]:
     native_query = remove_special_characters(native_query)
-    LOGGER.debug("Processing query = %s", native_query)
+    logger.debug("Processing query = %s", native_query)
     tables: List[str] = []
     parsed = sqlparse.parse(native_query)[0]
     tokens: List[sqlparse.sql.Token] = list(parsed.tokens)
     length: int = len(tokens)
     from_index: int = -1
     for index, token in enumerate(tokens):
-        LOGGER.debug("%s=%s", token.value, token.ttype)
+        logger.debug("%s=%s", token.value, token.ttype)
         if (
             token.value.lower().strip() == "from"
             and str(token.ttype) == "Token.Keyword"
@@ -37,8 +37,8 @@ def get_tables(native_query: str) -> List[str]:
         from_index < length
         and isinstance(tokens[from_index], sqlparse.sql.Where) is not True
     ):
-        LOGGER.debug("%s=%s", tokens[from_index].value, tokens[from_index].ttype)
-        LOGGER.debug("Type=%s", type(tokens[from_index]))
+        logger.debug("%s=%s", tokens[from_index].value, tokens[from_index].ttype)
+        logger.debug("Type=%s", type(tokens[from_index]))
         if isinstance(tokens[from_index], sqlparse.sql.Identifier):
             # Split on as keyword and collect the table name from 0th position. strip any spaces
             tables.append(tokens[from_index].value.split("as")[0].strip())
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
index 7ea28d5b579fa..7f607b8e82005 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py
@@ -1,6 +1,6 @@
 import importlib.resources as pkg_resource
 import logging
-from typing import List, cast
+from typing import List, Optional, cast
 
 import lark
 from lark import Lark, Tree
@@ -9,25 +9,35 @@
 from datahub.ingestion.source.powerbi.m_query import resolver, validator
 from datahub.ingestion.source.powerbi.proxy import PowerBiAPI
 
-LOGGER = logging.getLogger(__name__)
+logger = logging.getLogger(__name__)
 
+lark_parser: Optional[Lark] = None
+
+
+def get_lark_parser():
+    global lark_parser
+    if lark_parser is not None:
+        return lark_parser
 
-def _parse_expression(expression: str) -> Tree:
     # Read lexical grammar as text
     grammar: str = pkg_resource.read_text(
         "datahub.ingestion.source.powerbi", "powerbi-lexical-grammar.rule"
     )
-
     # Create lark parser for the grammar text
-    lark_parser = Lark(grammar, start="let_expression", regex=True)
+    return Lark(grammar, start="let_expression", regex=True)
+
+
+def _parse_expression(expression: str) -> Tree:
+    lark_parser: Lark = get_lark_parser()
 
     parse_tree: Tree = lark_parser.parse(expression)
 
-    LOGGER.debug("Parse Tree")
+    logger.debug("Parsing expression = %s", expression)
+
     if (
-        LOGGER.level == logging.DEBUG
+        logger.level == logging.DEBUG
     ):  # Guard condition to avoid heavy pretty() function call
-        LOGGER.debug(parse_tree.pretty())
+        logger.debug(parse_tree.pretty())
 
     return parse_tree
 
@@ -38,7 +48,7 @@ def get_upstream_tables(
     native_query_enabled: bool = True,
 ) -> List[resolver.DataPlatformTable]:
     if table.expression is None:
-        LOGGER.debug(table.full_name, "Expression is none")
+        logger.debug(table.full_name, "Expression is none")
         return []
 
     try:
@@ -47,11 +57,11 @@ def get_upstream_tables(
             parse_tree, native_query_enabled=native_query_enabled
         )
         if valid is False:
-            LOGGER.debug("Validation failed: %s", cast(str, message))
+            logger.debug("Validation failed: %s", cast(str, message))
             reporter.report_warning(table.full_name, cast(str, message))
             return []
     except lark.exceptions.UnexpectedCharacters as e:
-        LOGGER.debug(f"Fail to parse expression {table.expression}", exc_info=e)
+        logger.debug(f"Fail to parse expression {table.expression}", exc_info=e)
         reporter.report_warning(
             table.full_name, f"UnSupported expression = {table.expression}"
         )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
index 77370a4f07727..b3fe9d31026be 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py
@@ -14,7 +14,7 @@
 )
 from datahub.ingestion.source.powerbi.proxy import PowerBiAPI
 
-LOGGER = logging.getLogger(__name__)
+logger = logging.getLogger(__name__)
 
 
 @dataclass
@@ -92,16 +92,16 @@ def get_item_selector_tokens(
             expression_tree
         )
         if item_selector is None:
-            LOGGER.debug("Item Selector not found in tree")
-            LOGGER.debug(expression_tree.pretty())
+            logger.debug("Item Selector not found in tree")
+            logger.debug(expression_tree.pretty())
             return None, None
 
         identifier_tree: Optional[Tree] = tree_function.first_identifier_func(
             expression_tree
         )
         if identifier_tree is None:
-            LOGGER.debug("Identifier not found in tree")
-            LOGGER.debug(item_selector.pretty())
+            logger.debug("Identifier not found in tree")
+            logger.debug(item_selector.pretty())
             return None, None
 
         # remove whitespaces and quotes from token
@@ -126,7 +126,7 @@ def get_argument_list(invoke_expression: Tree) -> Optional[Tree]:
             invoke_expression
         )
         if argument_list is None:
-            LOGGER.debug("First argument-list rule not found in input tree")
+            logger.debug("First argument-list rule not found in input tree")
             return None
 
         return argument_list
@@ -162,7 +162,7 @@ def _process_invoke_expression(
         )
 
         if first_arg_tree is None:
-            LOGGER.debug(
+            logger.debug(
                 "Function invocation without argument in expression = %s",
                 invoke_expression.pretty(),
             )
@@ -179,11 +179,11 @@ def _process_invoke_expression(
             first_argument
         )
 
-        LOGGER.debug("Extracting token from tree %s", first_argument.pretty())
+        logger.debug("Extracting token from tree %s", first_argument.pretty())
         if expression is None:
             expression = tree_function.first_type_expression_func(first_argument)
             if expression is None:
-                LOGGER.debug(
+                logger.debug(
                     "Either list_expression or type_expression is not found = %s",
                     invoke_expression.pretty(),
                 )
@@ -197,7 +197,7 @@ def _process_invoke_expression(
             tree_function.token_values(expression)
         )
 
-        LOGGER.debug("Tokens in invoke expression are %s", tokens)
+        logger.debug("Tokens in invoke expression are %s", tokens)
         return tokens
 
     def _process_item_selector_expression(
@@ -267,8 +267,8 @@ def internal(
             # Any expression after "=" sign of variable-statement
             rh_tree: Optional[Tree] = tree_function.first_expression_func(v_statement)
             if rh_tree is None:
-                LOGGER.debug("Expression tree not found")
-                LOGGER.debug(v_statement.pretty())
+                logger.debug("Expression tree not found")
+                logger.debug(v_statement.pretty())
                 return None
 
             invoke_expression: Optional[
@@ -302,7 +302,7 @@ def internal(
                     rh_tree
                 )
                 if new_identifier is None or key_vs_value is None:
-                    LOGGER.debug("Required information not found in rh_tree")
+                    logger.debug("Required information not found in rh_tree")
                     return None
                 new_identifier_accessor: IdentifierAccessor = (
                     self._create_or_update_identifier_accessor(
@@ -340,7 +340,7 @@ def resolve_to_data_platform_table_list(self) -> List[DataPlatformTable]:
                 f_detail.data_access_function_name
             )
             if supported_resolver is None:
-                LOGGER.debug(
+                logger.debug(
                     "Resolver not found for the data-access-function %s",
                     f_detail.data_access_function_name,
                 )
@@ -383,7 +383,7 @@ def two_level_access_pattern(
     ) -> List[str]:
         full_table_names: List[str] = []
 
-        LOGGER.debug(
+        logger.debug(
             "Processing PostgreSQL data-access function detail %s",
             data_access_func_detail,
         )
@@ -395,7 +395,7 @@ def two_level_access_pattern(
         )
 
         if len(arguments) != 2:
-            LOGGER.debug("Expected 2 arguments, but got {%s}", len(arguments))
+            logger.debug("Expected 2 arguments, but got {%s}", len(arguments))
             return full_table_names
 
         db_name: str = arguments[1]
@@ -410,7 +410,11 @@ def two_level_access_pattern(
 
         full_table_names.append(f"{db_name}.{schema_name}.{table_name}")
 
-        LOGGER.debug("PostgreSQL full-table-names = %s", full_table_names)
+        logger.debug(
+            "Platform(%s) full-table-names = %s",
+            self.get_platform_pair().datahub_data_platform_name,
+            full_table_names,
+        )
 
         return full_table_names
 
@@ -442,11 +446,11 @@ def get_full_table_names(
 
         if len(arguments) == 2:
             # It is regular case of MS-SQL
-            LOGGER.debug("Handling with regular case")
+            logger.debug("Handling with regular case")
             return self.two_level_access_pattern(data_access_func_detail)
 
         if len(arguments) >= 4 and arguments[2] != "Query":
-            LOGGER.debug("Unsupported case is found. Second index is not the Query")
+            logger.debug("Unsupported case is found. Second index is not the Query")
             return full_table_names
 
         db_name: str = arguments[1]
@@ -462,7 +466,7 @@ def get_full_table_names(
                 f"{db_name}.{schema_and_table[0]}.{schema_and_table[1]}"
             )
 
-        LOGGER.debug("MS-SQL full-table-names %s", full_table_names)
+        logger.debug("MS-SQL full-table-names %s", full_table_names)
 
         return full_table_names
 
@@ -475,7 +479,7 @@ def _get_db_name(self, value: str) -> Optional[str]:
         error_message: str = f"The target argument ({value}) should in the format of <host-name>:<port>/<db-name>[.<domain>]"
         splitter_result: List[str] = value.split("/")
         if len(splitter_result) != 2:
-            LOGGER.debug(error_message)
+            logger.debug(error_message)
             return None
 
         db_name = splitter_result[1].split(".")[0]
@@ -487,7 +491,7 @@ def get_full_table_names(
     ) -> List[str]:
         full_table_names: List[str] = []
 
-        LOGGER.debug(
+        logger.debug(
             "Processing Oracle data-access function detail %s", data_access_func_detail
         )
 
@@ -521,7 +525,7 @@ def get_full_table_names(
         self, data_access_func_detail: DataAccessFunctionDetail
     ) -> List[str]:
 
-        LOGGER.debug("Processing Snowflake function detail %s", data_access_func_detail)
+        logger.debug("Processing Snowflake function detail %s", data_access_func_detail)
         # First is database name
         db_name: str = data_access_func_detail.identifier_accessor.items["Name"]  # type: ignore
         # Second is schema name
@@ -535,7 +539,7 @@ def get_full_table_names(
 
         full_table_name: str = f"{db_name}.{schema_name}.{table_name}"
 
-        LOGGER.debug("Snowflake full-table-name %s", full_table_name)
+        logger.debug("Snowflake full-table-name %s", full_table_name)
 
         return [full_table_name]
 
@@ -554,11 +558,11 @@ def get_full_table_names(
         flat_argument_list: List[Tree] = tree_function.flat_argument_list(t1)
 
         if len(flat_argument_list) != 2:
-            LOGGER.debug(
+            logger.debug(
                 "Expecting 2 argument, actual argument count is %s",
                 len(flat_argument_list),
             )
-            LOGGER.debug("Flat argument list = %s", flat_argument_list)
+            logger.debug("Flat argument list = %s", flat_argument_list)
             return full_table_names
 
         data_access_tokens: List[str] = tree_function.remove_whitespaces_from_list(
@@ -568,10 +572,10 @@ def get_full_table_names(
             data_access_tokens[0]
             != SupportedDataPlatform.SNOWFLAKE.value.powerbi_data_platform_name
         ):
-            LOGGER.debug(
+            logger.debug(
                 "Provided native-query data-platform = %s", data_access_tokens[0]
             )
-            LOGGER.debug("Only Snowflake is supported in NativeQuery")
+            logger.debug("Only Snowflake is supported in NativeQuery")
             return full_table_names
 
         # First argument is the query
@@ -586,7 +590,7 @@ def get_full_table_names(
 
         for table in native_sql_parser.get_tables(sql_query):
             if len(table.split(".")) != 3:
-                LOGGER.debug(
+                logger.debug(
                     "Skipping table (%s) as it is not as per full_table_name format",
                     table,
                 )
@@ -647,9 +651,9 @@ def get_function_names() -> List[str]:
 
     @staticmethod
     def get_resolver(function_name: str) -> Optional["SupportedResolver"]:
-        LOGGER.debug("Looking for resolver %s", function_name)
+        logger.debug("Looking for resolver %s", function_name)
         for supported_resolver in SupportedResolver:
             if function_name == supported_resolver.get_function_name():
                 return supported_resolver
-        LOGGER.debug("Looking not found for resolver %s", function_name)
+        logger.debug("Looking not found for resolver %s", function_name)
         return None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py
index aac946d9b7987..c8a2807084611 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py
@@ -4,7 +4,7 @@
 
 from lark import Token, Tree
 
-LOGGER = logging.getLogger(__name__)
+logger = logging.getLogger(__name__)
 
 
 def get_output_variable(root: Tree) -> Optional[str]:
@@ -28,13 +28,13 @@ def get_variable_statement(parse_tree: Tree, variable: str) -> Optional[Tree]:
     for tree in _filter:
         values: List[str] = token_values(tree.children[0])
         actual_value: str = "".join(strip_char_from_list(values, " "))
-        LOGGER.debug("Actual Value = %s", actual_value)
-        LOGGER.debug("Expected Value = %s", variable)
+        logger.debug("Actual Value = %s", actual_value)
+        logger.debug("Expected Value = %s", variable)
 
         if actual_value.lower() == variable.lower():
             return tree
 
-    LOGGER.info("Provided variable(%s) not found in variable rule", variable)
+    logger.info("Provided variable(%s) not found in variable rule", variable)
 
     return None
 
@@ -120,7 +120,7 @@ def get_all_function_name(tree: Tree) -> List[str]:
     _filter: Any = tree.find_data("invoke_expression")
 
     for node in _filter:
-        LOGGER.debug("Tree = %s", node.pretty())
+        logger.debug("Tree = %s", node.pretty())
         primary_expression_node: Optional[Tree] = first_primary_expression_func(node)
         if primary_expression_node is None:
             continue
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/validator.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/validator.py
index abe7d0e46b05a..5bb8f811fa61b 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/validator.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/validator.py
@@ -5,33 +5,7 @@
 
 from datahub.ingestion.source.powerbi.m_query import resolver, tree_function
 
-LOGGER = logging.getLogger(__name__)
-
-
-def any_one_should_present(
-    supported_funcs: List[str], functions: List[str]
-) -> Tuple[bool, Optional[str]]:
-    """
-    Anyone functions from supported_funcs should present in functions list
-    :param supported_funcs: List of function m_query module supports
-    :param functions: List of functions retrieved from expression
-    :return: True or False
-    """
-    for f in supported_funcs:
-        if f in functions:
-            return True, None
-
-    return False, f"Function from supported function list {supported_funcs} not found"
-
-
-def all_function_should_be_known(
-    supported_funcs: List[str], functions: List[str]
-) -> Tuple[bool, Optional[str]]:
-    for f in functions:
-        if f not in supported_funcs:
-            return False, f"Function {f} is unknown"
-
-    return True, None
+logger = logging.getLogger(__name__)
 
 
 def validate_parse_tree(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
index a6f8a5c211cf7..8218a7ea718eb 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
@@ -52,7 +52,7 @@
 from datahub.utilities.dedup_list import deduplicate_list
 
 # Logger instance
-LOGGER = logging.getLogger(__name__)
+logger = logging.getLogger(__name__)
 
 
 class Mapper:
@@ -141,7 +141,7 @@ def extract_lineage(
                 upstream_table.data_platform_pair.powerbi_data_platform_name
                 not in self.__config.dataset_type_mapping.keys()
             ):
-                LOGGER.debug(
+                logger.debug(
                     "Skipping upstream table for %s. The platform (%s) is not part of dataset_type_mapping",
                     ds_urn,
                     upstream_table.data_platform_pair.powerbi_data_platform_name,
@@ -202,8 +202,8 @@ def to_datahub_dataset(
         if dataset is None:
             return dataset_mcps
 
-        LOGGER.info(
-            f"Converting dataset={dataset.name}(id={dataset.id}) to datahub dataset"
+        logger.debug(
+            f"Mapping dataset={dataset.name}(id={dataset.id}) to datahub dataset"
         )
 
         for table in dataset.tables:
@@ -214,7 +214,7 @@ def to_datahub_dataset(
                 env=self.__config.env,
             )
 
-            LOGGER.info(f"{Constant.Dataset_URN}={ds_urn}")
+            logger.debug(f"{Constant.Dataset_URN}={ds_urn}")
             # Create datasetProperties mcp
             ds_properties = DatasetPropertiesClass(
                 name=table.name, description=table.name
@@ -247,13 +247,13 @@ def to_datahub_chart_mcp(
         """
         Map PowerBi tile to datahub chart
         """
-        LOGGER.info("Converting tile {}(id={}) to chart".format(tile.title, tile.id))
+        logger.info("Converting tile {}(id={}) to chart".format(tile.title, tile.id))
         # Create a URN for chart
         chart_urn = builder.make_chart_urn(
             self.__config.platform_name, tile.get_urn_part()
         )
 
-        LOGGER.info("{}={}".format(Constant.CHART_URN, chart_urn))
+        logger.info("{}={}".format(Constant.CHART_URN, chart_urn))
 
         ds_input: List[str] = self.to_urn_set(ds_mcps)
 
@@ -431,9 +431,7 @@ def to_datahub_user(
         Map PowerBi user to datahub user
         """
 
-        LOGGER.info(
-            f"Converting user {user.displayName}(id={user.id}) to datahub's user"
-        )
+        logger.debug(f"Mapping user {user.displayName}(id={user.id}) to datahub's user")
 
         # Create an URN for user
         user_urn = builder.make_user_urn(user.get_urn_part())
@@ -493,7 +491,7 @@ def to_datahub_chart(
         if not tiles:
             return [], []
 
-        LOGGER.info(f"Converting tiles(count={len(tiles)}) to charts")
+        logger.info(f"Converting tiles(count={len(tiles)}) to charts")
 
         for tile in tiles:
             if tile is None:
@@ -515,7 +513,7 @@ def to_datahub_work_units(
     ) -> List[EquableMetadataWorkUnit]:
         mcps = []
 
-        LOGGER.info(
+        logger.info(
             f"Converting dashboard={dashboard.displayName} to datahub dashboard"
         )
 
@@ -551,18 +549,18 @@ def pages_to_chart(
         if not pages:
             return []
 
-        LOGGER.debug(f"Converting pages(count={len(pages)}) to charts")
+        logger.debug(f"Converting pages(count={len(pages)}) to charts")
 
         def to_chart_mcps(
             page: PowerBiAPI.Page, ds_mcps: List[MetadataChangeProposalWrapper]
         ) -> List[MetadataChangeProposalWrapper]:
-            LOGGER.debug("Converting page {} to chart".format(page.displayName))
+            logger.debug("Converting page {} to chart".format(page.displayName))
             # Create a URN for chart
             chart_urn = builder.make_chart_urn(
                 self.__config.platform_name, page.get_urn_part()
             )
 
-            LOGGER.debug("{}={}".format(Constant.CHART_URN, chart_urn))
+            logger.debug("{}={}".format(Constant.CHART_URN, chart_urn))
 
             ds_input: List[str] = self.to_urn_set(ds_mcps)
 
@@ -710,7 +708,7 @@ def report_to_datahub_work_units(
     ) -> Iterable[MetadataWorkUnit]:
         mcps: List[MetadataChangeProposalWrapper] = []
 
-        LOGGER.debug(f"Converting dashboard={report.name} to datahub dashboard")
+        logger.debug(f"Converting dashboard={report.name} to datahub dashboard")
 
         # Convert user to CorpUser
         user_mcps = self.to_datahub_users(report.users)
@@ -787,12 +785,12 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
         """
         Datahub Ingestion framework invoke this method
         """
-        LOGGER.info("PowerBi plugin execution is started")
+        logger.info("PowerBi plugin execution is started")
         # Validate dataset type mapping
         self.validate_dataset_type_mapping()
         # Fetch PowerBi workspace for given workspace identifier
         for workspace_id in self.get_workspace_ids():
-            LOGGER.info(f"Scanning workspace id: {workspace_id}")
+            logger.info(f"Scanning workspace id: {workspace_id}")
             workspace = self.powerbi_client.get_workspace(workspace_id, self.reporter)
 
             for dashboard in workspace.dashboards:
@@ -806,7 +804,7 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
                 except Exception as e:
                     message = f"Error ({e}) occurred while loading dashboard {dashboard.displayName}(id={dashboard.id}) tiles."
 
-                    LOGGER.exception(message, e)
+                    logger.exception(message, e)
                     self.reporter.report_warning(dashboard.id, message)
                 # Convert PowerBi Dashboard and child entities to Datahub work unit to ingest into Datahub
                 workunits = self.mapper.to_datahub_work_units(dashboard)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/proxy.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/proxy.py
index f6998f57fc0b1..f1cec25b48b73 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/proxy.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/proxy.py
@@ -16,7 +16,7 @@
 )
 
 # Logger instance
-LOGGER = logging.getLogger(__name__)
+logger = logging.getLogger(__name__)
 
 
 class PowerBiAPI:
@@ -208,9 +208,9 @@ def __init__(self, config: PowerBiAPIConfig) -> None:
         )
 
         # Test connection by generating a access token
-        LOGGER.info("Trying to connect to {}".format(self.__get_authority_url()))
+        logger.info("Trying to connect to {}".format(self.__get_authority_url()))
         self.get_access_token()
-        LOGGER.info("Able to connect to {}".format(self.__get_authority_url()))
+        logger.info("Able to connect to {}".format(self.__get_authority_url()))
 
     def __get_authority_url(self):
         return "{}{}".format(PowerBiAPI.AUTHORITY, self.__config.tenant_id)
@@ -221,7 +221,7 @@ def __get_users(self, workspace_id: str, entity: str, _id: str) -> List[User]:
         """
         users: List[PowerBiAPI.User] = []
         if self.__config.extract_ownership is False:
-            LOGGER.info(
+            logger.info(
                 "Extract ownership capabilities is disabled from configuration and hence returning empty users list"
             )
             return users
@@ -234,7 +234,7 @@ def __get_users(self, workspace_id: str, entity: str, _id: str) -> List[User]:
             ENTITY_ID=_id,
         )
         # Hit PowerBi
-        LOGGER.info(f"Request to URL={user_list_endpoint}")
+        logger.info(f"Request to URL={user_list_endpoint}")
         response = requests.get(
             user_list_endpoint,
             headers={Constant.Authorization: self.get_access_token()},
@@ -242,15 +242,15 @@ def __get_users(self, workspace_id: str, entity: str, _id: str) -> List[User]:
 
         # Check if we got response from PowerBi
         if response.status_code != 200:
-            LOGGER.warning(
+            logger.warning(
                 "Failed to fetch user list from power-bi. http_status=%s. message=%s",
                 response.status_code,
                 response.text,
             )
 
-            LOGGER.info(f"{Constant.WorkspaceId}={workspace_id}")
-            LOGGER.info(f"{Constant.ENTITY}={entity}")
-            LOGGER.info(f"{Constant.ID}={_id}")
+            logger.info(f"{Constant.WorkspaceId}={workspace_id}")
+            logger.info(f"{Constant.ENTITY}={entity}")
+            logger.info(f"{Constant.ID}={_id}")
             raise ConnectionError("Failed to fetch the user list from the power-bi")
 
         users_dict: List[Any] = response.json()[Constant.VALUE]
@@ -276,9 +276,9 @@ def _get_report(
         Fetch the report from PowerBi for the given report identifier
         """
         if workspace_id is None or report_id is None:
-            LOGGER.info("Input values are None")
-            LOGGER.info(f"{Constant.WorkspaceId}={workspace_id}")
-            LOGGER.info(f"{Constant.ReportId}={report_id}")
+            logger.info("Input values are None")
+            logger.info(f"{Constant.WorkspaceId}={workspace_id}")
+            logger.info(f"{Constant.ReportId}={report_id}")
             return None
 
         report_get_endpoint: str = PowerBiAPI.API_ENDPOINTS[Constant.REPORT_GET]
@@ -289,7 +289,7 @@ def _get_report(
             REPORT_ID=report_id,
         )
         # Hit PowerBi
-        LOGGER.info(f"Request to report URL={report_get_endpoint}")
+        logger.info(f"Request to report URL={report_get_endpoint}")
         response = requests.get(
             report_get_endpoint,
             headers={Constant.Authorization: self.get_access_token()},
@@ -298,9 +298,9 @@ def _get_report(
         # Check if we got response from PowerBi
         if response.status_code != 200:
             message: str = "Failed to fetch report from power-bi for"
-            LOGGER.warning(message)
-            LOGGER.warning(f"{Constant.WorkspaceId}={workspace_id}")
-            LOGGER.warning(f"{Constant.ReportId}={report_id}")
+            logger.warning(message)
+            logger.warning(f"{Constant.WorkspaceId}={workspace_id}")
+            logger.warning(f"{Constant.ReportId}={report_id}")
             raise ConnectionError(message)
 
         response_dict = response.json()
@@ -320,28 +320,28 @@ def _get_report(
 
     def get_access_token(self):
         if self.__access_token != "":
-            LOGGER.info("Returning the cached access token")
+            logger.debug("Returning the cached access token")
             return self.__access_token
 
-        LOGGER.info("Generating PowerBi access token")
+        logger.info("Generating PowerBi access token")
 
         auth_response = self.__msal_client.acquire_token_for_client(
             scopes=[PowerBiAPI.SCOPE]
         )
 
         if not auth_response.get("access_token"):
-            LOGGER.warning(
+            logger.warning(
                 "Failed to generate the PowerBi access token. Please check input configuration"
             )
             raise ConfigurationError(
                 "Powerbi authorization failed . Please check your input configuration."
             )
 
-        LOGGER.info("Generated PowerBi access token")
+        logger.info("Generated PowerBi access token")
 
         self.__access_token = "Bearer {}".format(auth_response.get("access_token"))
 
-        LOGGER.debug(f"{Constant.PBIAccessToken}={self.__access_token}")
+        logger.debug(f"{Constant.PBIAccessToken}={self.__access_token}")
 
         return self.__access_token
 
@@ -366,7 +366,7 @@ def get_dashboards(self, workspace: Workspace) -> List[Dashboard]:
             POWERBI_BASE_URL=PowerBiAPI.BASE_URL, WORKSPACE_ID=workspace.id
         )
         # Hit PowerBi
-        LOGGER.info(f"Request to URL={dashboard_list_endpoint}")
+        logger.info(f"Request to URL={dashboard_list_endpoint}")
         response = requests.get(
             dashboard_list_endpoint,
             headers={Constant.Authorization: self.get_access_token()},
@@ -374,8 +374,8 @@ def get_dashboards(self, workspace: Workspace) -> List[Dashboard]:
 
         # Check if we got response from PowerBi
         if response.status_code != 200:
-            LOGGER.warning("Failed to fetch dashboard list from power-bi for")
-            LOGGER.warning(f"{Constant.WorkspaceId}={workspace.id}")
+            logger.warning("Failed to fetch dashboard list from power-bi for")
+            logger.warning(f"{Constant.WorkspaceId}={workspace.id}")
             raise ConnectionError(
                 "Failed to fetch the dashboard list from the power-bi"
             )
@@ -406,9 +406,9 @@ def get_dataset(self, workspace_id: str, dataset_id: str) -> Any:
         Fetch the dataset from PowerBi for the given dataset identifier
         """
         if workspace_id is None or dataset_id is None:
-            LOGGER.info("Input values are None")
-            LOGGER.info(f"{Constant.WorkspaceId}={workspace_id}")
-            LOGGER.info(f"{Constant.DatasetId}={dataset_id}")
+            logger.info("Input values are None")
+            logger.info(f"{Constant.WorkspaceId}={workspace_id}")
+            logger.info(f"{Constant.DatasetId}={dataset_id}")
             return None
 
         dataset_get_endpoint: str = PowerBiAPI.API_ENDPOINTS[Constant.DATASET_GET]
@@ -419,7 +419,7 @@ def get_dataset(self, workspace_id: str, dataset_id: str) -> Any:
             DATASET_ID=dataset_id,
         )
         # Hit PowerBi
-        LOGGER.info(f"Request to dataset URL={dataset_get_endpoint}")
+        logger.info(f"Request to dataset URL={dataset_get_endpoint}")
         response = requests.get(
             dataset_get_endpoint,
             headers={Constant.Authorization: self.get_access_token()},
@@ -428,13 +428,13 @@ def get_dataset(self, workspace_id: str, dataset_id: str) -> Any:
         # Check if we got response from PowerBi
         if response.status_code != 200:
             message: str = "Failed to fetch dataset from power-bi for"
-            LOGGER.warning(message)
-            LOGGER.warning(f"{Constant.WorkspaceId}={workspace_id}")
-            LOGGER.warning(f"{Constant.DatasetId}={dataset_id}")
+            logger.warning(message)
+            logger.warning(f"{Constant.WorkspaceId}={workspace_id}")
+            logger.warning(f"{Constant.DatasetId}={dataset_id}")
             raise ConnectionError(message)
 
         response_dict = response.json()
-        LOGGER.debug("datasets = {}".format(response_dict))
+        logger.debug("datasets = {}".format(response_dict))
         # PowerBi Always return the webURL, in-case if it is None then setting complete webURL to None instead of
         # None/details
         return PowerBiAPI.PowerBIDataset(
@@ -462,7 +462,7 @@ def get_data_sources(
             DATASET_ID=dataset.id,
         )
         # Hit PowerBi
-        LOGGER.info(f"Request to datasource URL={datasource_get_endpoint}")
+        logger.info(f"Request to datasource URL={datasource_get_endpoint}")
         response = requests.get(
             datasource_get_endpoint,
             headers={Constant.Authorization: self.get_access_token()},
@@ -471,11 +471,11 @@ def get_data_sources(
         # Check if we got response from PowerBi
         if response.status_code != 200:
             message: str = "Failed to fetch datasource from power-bi for"
-            LOGGER.warning(message)
-            LOGGER.warning("{}={}".format(Constant.WorkspaceId, dataset.workspace_id))
-            LOGGER.warning("{}={}".format(Constant.DatasetId, dataset.id))
-            LOGGER.warning("{}={}".format(Constant.HTTP_RESPONSE_TEXT, response.text))
-            LOGGER.warning(
+            logger.warning(message)
+            logger.warning("{}={}".format(Constant.WorkspaceId, dataset.workspace_id))
+            logger.warning("{}={}".format(Constant.DatasetId, dataset.id))
+            logger.warning("{}={}".format(Constant.HTTP_RESPONSE_TEXT, response.text))
+            logger.warning(
                 "{}={}".format(Constant.HTTP_RESPONSE_STATUS_CODE, response.status_code)
             )
 
@@ -484,14 +484,14 @@ def get_data_sources(
         res = response.json()
         value = res["value"]
         if len(value) == 0:
-            LOGGER.info(
+            logger.info(
                 f"datasource is not found for dataset {dataset.name}({dataset.id})"
             )
 
             return None
 
         data_sources: Dict[str, "PowerBiAPI.DataSource"] = {}
-        LOGGER.debug("data-sources = {}".format(value))
+        logger.debug("data-sources = {}".format(value))
         for datasource_dict in value:
             # Create datasource instance with basic detail available
             datasource = PowerBiAPI.DataSource(
@@ -545,7 +545,7 @@ def new_dataset_or_report(tile_instance: Any) -> dict:
             else:
                 report_fields["createdFrom"] = PowerBiAPI.Tile.CreatedFrom.VISUALIZATION
 
-            LOGGER.info(
+            logger.info(
                 "Tile %s(%s) is created from %s",
                 tile_instance.get("title"),
                 tile_instance.get("id"),
@@ -562,7 +562,7 @@ def new_dataset_or_report(tile_instance: Any) -> dict:
             DASHBOARD_ID=dashboard.id,
         )
         # Hit PowerBi
-        LOGGER.info("Request to URL={}".format(tile_list_endpoint))
+        logger.info("Request to URL={}".format(tile_list_endpoint))
         response = requests.get(
             tile_list_endpoint,
             headers={Constant.Authorization: self.get_access_token()},
@@ -570,14 +570,14 @@ def new_dataset_or_report(tile_instance: Any) -> dict:
 
         # Check if we got response from PowerBi
         if response.status_code != 200:
-            LOGGER.warning("Failed to fetch tiles list from power-bi for")
-            LOGGER.warning("{}={}".format(Constant.WorkspaceId, workspace.id))
-            LOGGER.warning("{}={}".format(Constant.DashboardId, dashboard.id))
+            logger.warning("Failed to fetch tiles list from power-bi for")
+            logger.warning("{}={}".format(Constant.WorkspaceId, workspace.id))
+            logger.warning("{}={}".format(Constant.DashboardId, dashboard.id))
             raise ConnectionError("Failed to fetch the tile list from the power-bi")
 
         # Iterate through response and create a list of PowerBiAPI.Dashboard
         tile_dict: List[Any] = response.json()[Constant.VALUE]
-        LOGGER.debug("Tile Dict = {}".format(tile_dict))
+        logger.debug("Tile Dict = {}".format(tile_dict))
         tiles: List[PowerBiAPI.Tile] = [
             PowerBiAPI.Tile(
                 id=instance.get("id"),
@@ -598,7 +598,7 @@ def get_pages_by_report(
         Fetch the report from PowerBi for the given report identifier
         """
         if workspace_id is None or report_id is None:
-            LOGGER.info("workspace_id or report_id is None")
+            logger.info("workspace_id or report_id is None")
             return []
 
         pages_endpoint: str = PowerBiAPI.API_ENDPOINTS[Constant.PAGE_BY_REPORT]
@@ -609,7 +609,7 @@ def get_pages_by_report(
             REPORT_ID=report_id,
         )
         # Hit PowerBi
-        LOGGER.info(f"Request to pages URL={pages_endpoint}")
+        logger.info(f"Request to pages URL={pages_endpoint}")
         response = requests.get(
             pages_endpoint,
             headers={Constant.Authorization: self.get_access_token()},
@@ -618,8 +618,8 @@ def get_pages_by_report(
         # Check if we got response from PowerBi
         if response.status_code != 200:
             message: str = "Failed to fetch reports from power-bi for"
-            LOGGER.warning(message)
-            LOGGER.warning(f"{Constant.WorkspaceId}={workspace_id}")
+            logger.warning(message)
+            logger.warning(f"{Constant.WorkspaceId}={workspace_id}")
             raise ConnectionError(message)
 
         response_dict = response.json()
@@ -640,7 +640,7 @@ def get_reports(
         Fetch the report from PowerBi for the given report identifier
         """
         if workspace is None:
-            LOGGER.info("workspace is None")
+            logger.info("workspace is None")
             return []
 
         report_list_endpoint: str = PowerBiAPI.API_ENDPOINTS[Constant.REPORT_LIST]
@@ -650,7 +650,7 @@ def get_reports(
             WORKSPACE_ID=workspace.id,
         )
         # Hit PowerBi
-        LOGGER.info(f"Request to report URL={report_list_endpoint}")
+        logger.info(f"Request to report URL={report_list_endpoint}")
         response = requests.get(
             report_list_endpoint,
             headers={Constant.Authorization: self.get_access_token()},
@@ -659,8 +659,8 @@ def get_reports(
         # Check if we got response from PowerBi
         if response.status_code != 200:
             message: str = "Failed to fetch reports from power-bi for"
-            LOGGER.warning(message)
-            LOGGER.warning(f"{Constant.WorkspaceId}={workspace.id}")
+            logger.warning(message)
+            logger.warning(f"{Constant.WorkspaceId}={workspace.id}")
             raise ConnectionError(message)
 
         response_dict = response.json()
@@ -687,7 +687,7 @@ def get_reports(
     def get_groups(self):
         group_endpoint = PowerBiAPI.BASE_URL
         # Hit PowerBi
-        LOGGER.info(f"Request to get groups endpoint URL={group_endpoint}")
+        logger.info(f"Request to get groups endpoint URL={group_endpoint}")
         response = requests.get(
             group_endpoint,
             headers={Constant.Authorization: self.get_access_token()},
@@ -744,17 +744,17 @@ def create_scan_job():
             if res.status_code not in (200, 202):
                 message = f"API({scan_create_endpoint}) return error code {res.status_code} for workspace id({workspace_id})"
 
-                LOGGER.warning(message)
+                logger.warning(message)
 
                 raise ConnectionError(message)
             # Return Id of Scan created for the given workspace
             id = res.json()["id"]
-            LOGGER.info("Scan id({})".format(id))
+            logger.info("Scan id({})".format(id))
             return id
 
         def calculate_max_trial(minimum_sleep: int, timeout: int) -> int:
             if timeout < minimum_sleep:
-                LOGGER.info(
+                logger.info(
                     f"Setting timeout to minimum_sleep time {minimum_sleep} seconds"
                 )
                 timeout = minimum_sleep
@@ -767,33 +767,33 @@ def wait_for_scan_to_complete(scan_id: str, timeout: int) -> Any:
             """
             minimum_sleep = 3
             max_trial: int = calculate_max_trial(minimum_sleep, timeout)
-            LOGGER.info(f"Max trial {max_trial}")
+            logger.info(f"Max trial {max_trial}")
 
             scan_get_endpoint = PowerBiAPI.API_ENDPOINTS[Constant.SCAN_GET]
             scan_get_endpoint = scan_get_endpoint.format(
                 POWERBI_ADMIN_BASE_URL=PowerBiAPI.ADMIN_BASE_URL, SCAN_ID=scan_id
             )
-            LOGGER.info(f"Hitting URL={scan_get_endpoint}")
+            logger.debug(f"Hitting URL={scan_get_endpoint}")
             trail = 1
             while True:
-                LOGGER.info(f"Trial = {trail}")
+                logger.info(f"Trial = {trail}")
                 res = requests.get(
                     scan_get_endpoint,
                     headers={Constant.Authorization: self.get_access_token()},
                 )
                 if res.status_code != 200:
                     message = f"API({scan_get_endpoint}) return error code {res.status_code} for scan id({scan_id})"
-                    LOGGER.warning(message)
+                    logger.warning(message)
                     raise ConnectionError(message)
 
                 if res.json()["status"].upper() == "Succeeded".upper():
-                    LOGGER.info(f"Scan result is available for scan id({scan_id})")
+                    logger.info(f"Scan result is available for scan id({scan_id})")
                     return True
 
                 if trail == max_trial:
                     break
 
-                LOGGER.info(f"Sleeping for {minimum_sleep} seconds")
+                logger.info(f"Sleeping for {minimum_sleep} seconds")
                 sleep(minimum_sleep)
                 trail += 1
 
@@ -801,8 +801,8 @@ def wait_for_scan_to_complete(scan_id: str, timeout: int) -> Any:
             return False
 
         def get_scan_result(scan_id: str) -> dict:
-            LOGGER.info("Fetching scan  result")
-            LOGGER.info(f"{Constant.SCAN_ID}={scan_id}")
+            logger.info("Fetching scan  result")
+            logger.info(f"{Constant.SCAN_ID}={scan_id}")
             scan_result_get_endpoint = PowerBiAPI.API_ENDPOINTS[
                 Constant.SCAN_RESULT_GET
             ]
@@ -810,7 +810,7 @@ def get_scan_result(scan_id: str) -> dict:
                 POWERBI_ADMIN_BASE_URL=PowerBiAPI.ADMIN_BASE_URL, SCAN_ID=scan_id
             )
 
-            LOGGER.info(f"Hitting URL={scan_result_get_endpoint}")
+            logger.debug(f"Hitting URL={scan_result_get_endpoint}")
             res = requests.get(
                 scan_result_get_endpoint,
                 headers={Constant.Authorization: self.get_access_token()},
@@ -818,7 +818,7 @@ def get_scan_result(scan_id: str) -> dict:
             if res.status_code != 200:
                 message = f"API({scan_result_get_endpoint}) return error code {res.status_code} for scan id({scan_id})"
 
-                LOGGER.warning(message)
+                logger.warning(message)
 
                 raise ConnectionError(message)
 
@@ -832,11 +832,11 @@ def json_to_dataset_map(scan_result: dict) -> dict:
             dataset_map: dict = {}
 
             if datasets is None or len(datasets) == 0:
-                LOGGER.warning(
+                logger.warning(
                     f'Workspace {scan_result["name"]}({scan_result["id"]}) does not have datasets'
                 )
 
-                LOGGER.info("Returning empty datasets")
+                logger.info("Returning empty datasets")
                 return dataset_map
 
             for dataset_dict in datasets:
@@ -877,11 +877,11 @@ def init_dashboard_tiles(workspace: PowerBiAPI.Workspace) -> None:
 
             return None
 
-        LOGGER.info("Creating scan job for workspace")
-        LOGGER.info("{}={}".format(Constant.WorkspaceId, workspace_id))
-        LOGGER.info("Hitting URL={}".format(scan_create_endpoint))
+        logger.info("Creating scan job for workspace")
+        logger.info("{}={}".format(Constant.WorkspaceId, workspace_id))
+        logger.debug("Hitting URL={}".format(scan_create_endpoint))
         scan_id = create_scan_job()
-        LOGGER.info("Waiting for scan to complete")
+        logger.info("Waiting for scan to complete")
         if (
             wait_for_scan_to_complete(
                 scan_id=scan_id, timeout=self.__config.scan_timeout
@@ -895,7 +895,7 @@ def init_dashboard_tiles(workspace: PowerBiAPI.Workspace) -> None:
         # Scan is complete lets take the result
         scan_result = get_scan_result(scan_id=scan_id)
 
-        LOGGER.debug(f"scan result = %s", json.dumps(scan_result, indent=1))
+        logger.debug(f"scan result = %s", json.dumps(scan_result, indent=1))
         workspace = PowerBiAPI.Workspace(
             id=scan_result["id"],
             name=scan_result["name"],