diff --git a/shared/bundle_analysis/comparison.py b/shared/bundle_analysis/comparison.py index ed35d82cc..efe3bc07c 100644 --- a/shared/bundle_analysis/comparison.py +++ b/shared/bundle_analysis/comparison.py @@ -13,6 +13,7 @@ BundleAnalysisReport, BundleReport, BundleRouteReport, + ModuleReport, ) from shared.bundle_analysis.storage import BundleAnalysisReportLoader from shared.django_apps.core.models import Repository @@ -77,11 +78,66 @@ class AssetChange(BaseChange): """ asset_name: str + percentage_delta: float + size_base: int + size_head: int AssetMatch = Tuple[Optional[AssetReport], Optional[AssetReport]] +class AssetComparison: + def __init__( + self, + base_asset_report: Optional[AssetReport] = None, + head_asset_report: Optional[AssetReport] = None, + ): + self.base_asset_report = base_asset_report + self.head_asset_report = head_asset_report + + @sentry_sdk.trace + def asset_change(self) -> AssetChange: + if self.base_asset_report is None: + return AssetChange( + asset_name=self.head_asset_report.name, + change_type=AssetChange.ChangeType.ADDED, + size_delta=self.head_asset_report.size, + percentage_delta=100, + size_base=0, + size_head=self.head_asset_report.size, + ) + elif self.head_asset_report is None: + return AssetChange( + asset_name=self.base_asset_report.name, + change_type=AssetChange.ChangeType.REMOVED, + size_delta=-self.base_asset_report.size, + percentage_delta=-100.0, + size_base=self.base_asset_report.size, + size_head=0, + ) + else: + size_delta = self.head_asset_report.size - self.base_asset_report.size + percentage_delta = round( + (size_delta / self.base_asset_report.size) * 100, 2 + ) + return AssetChange( + asset_name=self.head_asset_report.name, + change_type=AssetChange.ChangeType.CHANGED, + size_delta=size_delta, + percentage_delta=percentage_delta, + size_base=self.base_asset_report.size, + size_head=self.head_asset_report.size, + ) + + def contributing_modules( + self, pr_changed_files: Optional[List[str]] = None + ) -> List[ModuleReport]: + asset_report = self.head_asset_report + if asset_report is None: + return [] + return asset_report.modules(pr_changed_files) + + class BundleComparison: def __init__( self, base_bundle_report: BundleReport, head_bundle_report: BundleReport @@ -95,7 +151,7 @@ def total_size_delta(self) -> int: return head_size - base_size @sentry_sdk.trace - def asset_changes(self) -> List[AssetChange]: + def asset_comparisons(self) -> List[AssetComparison]: # this groups assets by name # there can be multiple assets with the same name and we # need to try and match them across base and head reports @@ -119,29 +175,10 @@ def asset_changes(self) -> List[AssetChange]: if asset_name not in asset_names: matches += self._match_assets(asset_reports, []) - changes = [] - for base_asset_report, head_asset_report in matches: - if base_asset_report is None: - change = AssetChange( - asset_name=head_asset_report.name, - change_type=AssetChange.ChangeType.ADDED, - size_delta=head_asset_report.size, - ) - elif head_asset_report is None: - change = AssetChange( - asset_name=base_asset_report.name, - change_type=AssetChange.ChangeType.REMOVED, - size_delta=-base_asset_report.size, - ) - else: - change = AssetChange( - asset_name=head_asset_report.name, - change_type=AssetChange.ChangeType.CHANGED, - size_delta=head_asset_report.size - base_asset_report.size, - ) - changes.append(change) - - return changes + return [ + AssetComparison(base_asset_report, head_asset_report) + for base_asset_report, head_asset_report in matches + ] def _match_assets( self, @@ -153,9 +190,11 @@ def _match_assets( This method attempts to pick the most likely matching of assets between base and head (so as to track their changes through time). - The current approach is fairly naive and just picks the asset with the - closest size. There are probably better ways of doing this that we can - improve upon in the future. + Current approach: + 1. Pick asset with the same UUID. This means the base and head assets have either of: + - same hashed name + - same modules by name + 2. Pick asset with the closest size """ n = max([len(base_asset_reports), len(head_asset_reports)]) matches: List[AssetMatch] = [] @@ -169,13 +208,24 @@ def _match_assets( # no more base assets to match against matches.append((None, head_asset_report)) else: - # try and find the most "similar" base asset - size_deltas = { - abs(head_asset_report.size - base_bundle.size): base_bundle + # 1. Pick asset with the same UUID + base_asset_report_uuids = { + base_bundle.uuid: base_bundle for base_bundle in base_asset_reports } - min_delta = min(size_deltas.keys()) - base_asset_report = size_deltas[min_delta] + if head_asset_report.uuid in base_asset_report_uuids: + base_asset_report = base_asset_report_uuids[ + head_asset_report.uuid + ] + + # 2. Pick asset with the closest size + else: + size_deltas = { + abs(head_asset_report.size - base_bundle.size): base_bundle + for base_bundle in base_asset_reports + } + min_delta = min(size_deltas.keys()) + base_asset_report = size_deltas[min_delta] matches.append((base_asset_report, head_asset_report)) base_asset_reports.remove(base_asset_report) diff --git a/shared/bundle_analysis/parsers/v3.py b/shared/bundle_analysis/parsers/v3.py index c17e3fac3..bb7a4b9ad 100644 --- a/shared/bundle_analysis/parsers/v3.py +++ b/shared/bundle_analysis/parsers/v3.py @@ -7,6 +7,7 @@ import ijson import sentry_sdk +from sqlalchemy import tuple_ from sqlalchemy.orm import Session as DbSession from sqlalchemy.orm.exc import MultipleResultsFound, NoResultFound @@ -160,6 +161,16 @@ def parse(self, path: str) -> Tuple[int, str]: # but first we need to find the Asset by the hashed file name dynamic_imports_list = self._parse_dynamic_imports() if dynamic_imports_list: + self.db_session.execute( + DynamicImport.__table__.delete().where( + tuple_(DynamicImport.chunk_id, DynamicImport.asset_id).in_( + [ + (item["chunk_id"], item["asset_id"]) + for item in dynamic_imports_list + ] + ) + ) + ) insert_dynamic_imports = DynamicImport.__table__.insert().values( dynamic_imports_list ) diff --git a/shared/bundle_analysis/report.py b/shared/bundle_analysis/report.py index 744d7b7c4..6b6e79bdc 100644 --- a/shared/bundle_analysis/report.py +++ b/shared/bundle_analysis/report.py @@ -92,16 +92,28 @@ def uuid(self) -> str: def asset_type(self) -> AssetType: return self.asset.asset_type - def modules(self) -> List[ModuleReport]: + def modules( + self, pr_changed_files: Optional[List[str]] = None + ) -> List[ModuleReport]: with get_db_session(self.db_path) as session: - modules = ( + query = ( session.query(Module) .join(Module.chunks) .join(Chunk.assets) .filter(Asset.id == self.asset.id) - .all() ) - return [ModuleReport(self.db_path, module) for module in modules] + + # Apply additional filter if pr_changed_files is provided + if pr_changed_files: + # Normalize pr_changed_files to have './' prefix for relative paths + normalized_files = { + f"./{file}" if not file.startswith("./") else file + for file in pr_changed_files + } + query = query.filter(Module.name.in_(normalized_files)) + + modules = query.all() + return [ModuleReport(self.db_path, module) for module in modules] def routes(self) -> Optional[List[str]]: plugin_name = self.bundle_info.get("plugin_name") diff --git a/tests/unit/bundle_analysis/test_bundle_comparison.py b/tests/unit/bundle_analysis/test_bundle_comparison.py index 1a5d870e9..ac381986e 100644 --- a/tests/unit/bundle_analysis/test_bundle_comparison.py +++ b/tests/unit/bundle_analysis/test_bundle_comparison.py @@ -111,49 +111,416 @@ def test_bundle_analysis_comparison(): ) bundle_comparison = comparison.bundle_comparison("sample") - asset_changes = bundle_comparison.asset_changes() - assert set(asset_changes) == set( + total_size_delta = bundle_comparison.total_size_delta() + assert total_size_delta == 1100 + assert comparison.percentage_delta == 0.73 + + with pytest.raises(MissingBundleError): + comparison.bundle_comparison("new") + + +def test_bundle_asset_comparison_using_closest_size_delta(): + loader = BundleAnalysisReportLoader( + storage_service=MemoryStorageService({}), + repo_key="testing", + ) + + comparison = BundleAnalysisComparison( + loader=loader, + base_report_key="base-report", + head_report_key="head-report", + ) + + # raises errors when either report doesn't exist in storage + with pytest.raises(MissingBaseReportError): + comparison.base_report + with pytest.raises(MissingHeadReportError): + comparison.head_report + + try: + base_report = BundleAnalysisReport() + base_report.ingest(base_report_bundle_stats_path) + + old_bundle = Bundle(name="old") + with get_db_session(base_report.db_path) as db_session: + db_session.add(old_bundle) + db_session.commit() + + head_report = BundleAnalysisReport() + head_report.ingest(head_report_bundle_stats_path) + + new_bundle = Bundle(name="new") + with get_db_session(head_report.db_path) as db_session: + db_session.add(new_bundle) + db_session.commit() + + loader.save(base_report, "base-report") + loader.save(head_report, "head-report") + finally: + base_report.cleanup() + head_report.cleanup() + + bundle_changes = comparison.bundle_changes() + assert set(bundle_changes) == set( [ - AssetChange( - asset_name="assets/other-*.svg", - change_type=AssetChange.ChangeType.ADDED, - size_delta=5126, + BundleChange( + bundle_name="sample", + change_type=BundleChange.ChangeType.CHANGED, + size_delta=1100, + percentage_delta=0.73, ), - AssetChange( - asset_name="assets/index-*.css", - change_type=AssetChange.ChangeType.CHANGED, + BundleChange( + bundle_name="new", + change_type=BundleChange.ChangeType.ADDED, size_delta=0, + percentage_delta=100, ), - AssetChange( - asset_name="assets/LazyComponent-*.js", - change_type=AssetChange.ChangeType.CHANGED, + BundleChange( + bundle_name="old", + change_type=BundleChange.ChangeType.REMOVED, size_delta=0, + percentage_delta=-100, ), - AssetChange( - asset_name="assets/index-*.js", - change_type=AssetChange.ChangeType.CHANGED, - size_delta=100, + ] + ) + + bundle_comparison = comparison.bundle_comparison("sample") + asset_comparisons = bundle_comparison.asset_comparisons() + assert len(asset_comparisons) == 6 + + asset_comparison_d = {} + for asset_comparison in asset_comparisons: + key = ( + asset_comparison.base_asset_report.hashed_name + if asset_comparison.base_asset_report + else None, + asset_comparison.head_asset_report.hashed_name + if asset_comparison.head_asset_report + else None, + ) + assert key not in asset_comparison_d + asset_comparison_d[key] = asset_comparison + + # Check asset change is correct + assert asset_comparison_d[ + ("assets/index-666d2e09.js", "assets/index-666d2e09.js") + ].asset_change() == AssetChange( + change_type=AssetChange.ChangeType.CHANGED, + size_delta=0, + asset_name="assets/index-*.js", + percentage_delta=0, + size_base=144577, + size_head=144577, + ) + assert asset_comparison_d[ + ("assets/index-c8676264.js", "assets/index-c8676264.js") + ].asset_change() == AssetChange( + change_type=AssetChange.ChangeType.CHANGED, + size_delta=100, + asset_name="assets/index-*.js", + percentage_delta=64.94, + size_base=154, + size_head=254, + ) + assert asset_comparison_d[ + (None, "assets/other-35ef61ed.svg") + ].asset_change() == AssetChange( + change_type=AssetChange.ChangeType.ADDED, + size_delta=5126, + asset_name="assets/other-*.svg", + percentage_delta=100, + size_base=0, + size_head=5126, + ) + assert asset_comparison_d[ + ("assets/index-d526a0c5.css", "assets/index-d526a0c5.css") + ].asset_change() == AssetChange( + change_type=AssetChange.ChangeType.CHANGED, + size_delta=0, + asset_name="assets/index-*.css", + percentage_delta=0, + size_base=1421, + size_head=1421, + ) + assert asset_comparison_d[ + ("assets/LazyComponent-fcbb0922.js", "assets/LazyComponent-fcbb0922.js") + ].asset_change() == AssetChange( + change_type=AssetChange.ChangeType.CHANGED, + size_delta=0, + asset_name="assets/LazyComponent-*.js", + percentage_delta=0, + size_base=294, + size_head=294, + ) + assert asset_comparison_d[ + ("assets/react-35ef61ed.svg", None) + ].asset_change() == AssetChange( + change_type=AssetChange.ChangeType.REMOVED, + size_delta=-4126, + asset_name="assets/react-*.svg", + percentage_delta=-100, + size_base=4126, + size_head=0, + ) + + # Check asset contributing modules is correct + module_reports = asset_comparison_d[ + ("assets/index-666d2e09.js", "assets/index-666d2e09.js") + ].contributing_modules() + assert [module.name for module in module_reports] == [ + "./vite/modulepreload-polyfill", + "./commonjsHelpers.js", + "../../node_modules/.pnpm/react@18.2.0/node_modules/react/jsx-runtime.js?commonjs-module", + "../../node_modules/.pnpm/react@18.2.0/node_modules/react/cjs/react-jsx-runtime.production.min.js?commonjs-exports", + "../../node_modules/.pnpm/react@18.2.0/node_modules/react/index.js?commonjs-module", + "../../node_modules/.pnpm/react@18.2.0/node_modules/react/cjs/react.production.min.js?commonjs-exports", + "../../node_modules/.pnpm/react@18.2.0/node_modules/react/cjs/react.production.min.js", + "../../node_modules/.pnpm/react@18.2.0/node_modules/react/index.js", + "../../node_modules/.pnpm/react@18.2.0/node_modules/react/cjs/react-jsx-runtime.production.min.js", + "../../node_modules/.pnpm/react@18.2.0/node_modules/react/jsx-runtime.js", + "../../node_modules/.pnpm/react-dom@18.2.0_react@18.2.0/node_modules/react-dom/client.js?commonjs-exports", + "../../node_modules/.pnpm/react-dom@18.2.0_react@18.2.0/node_modules/react-dom/index.js?commonjs-module", + "../../node_modules/.pnpm/react-dom@18.2.0_react@18.2.0/node_modules/react-dom/cjs/react-dom.production.min.js?commonjs-exports", + "../../node_modules/.pnpm/scheduler@0.23.0/node_modules/scheduler/index.js?commonjs-module", + "../../node_modules/.pnpm/scheduler@0.23.0/node_modules/scheduler/cjs/scheduler.production.min.js?commonjs-exports", + "../../node_modules/.pnpm/scheduler@0.23.0/node_modules/scheduler/cjs/scheduler.production.min.js", + "../../node_modules/.pnpm/scheduler@0.23.0/node_modules/scheduler/index.js", + "../../node_modules/.pnpm/react-dom@18.2.0_react@18.2.0/node_modules/react-dom/cjs/react-dom.production.min.js", + "../../node_modules/.pnpm/react-dom@18.2.0_react@18.2.0/node_modules/react-dom/index.js", + "../../node_modules/.pnpm/react-dom@18.2.0_react@18.2.0/node_modules/react-dom/client.js", + "./vite/preload-helper", + "./src/assets/react.svg", + "../../../../../../vite.svg", + "./src/App.css", + "./src/App.tsx", + "./src/index.css", + "./src/main.tsx", + "./index.html", + ] + module_reports = asset_comparison_d[ + ("assets/index-c8676264.js", "assets/index-c8676264.js") + ].contributing_modules() + assert [module.name for module in module_reports] == [ + "./src/IndexedLazyComponent/IndexedLazyComponent.tsx", + "./src/IndexedLazyComponent/index.ts", + "./src/Other.tsx", + ] + module_reports = asset_comparison_d[ + (None, "assets/other-35ef61ed.svg") + ].contributing_modules() + assert [module.name for module in module_reports] == [] + module_reports = asset_comparison_d[ + ("assets/index-d526a0c5.css", "assets/index-d526a0c5.css") + ].contributing_modules() + assert [module.name for module in module_reports] == [] + module_reports = asset_comparison_d[ + ("assets/LazyComponent-fcbb0922.js", "assets/LazyComponent-fcbb0922.js") + ].contributing_modules() + assert [module.name for module in module_reports] == [ + "./src/LazyComponent/LazyComponent.tsx", + ] + module_reports = asset_comparison_d[ + ("assets/react-35ef61ed.svg", None) + ].contributing_modules() + assert [module.name for module in module_reports] == [] + + # Check asset contributing modules is correct when pr_files are used for filtering + module_reports = asset_comparison_d[ + ("assets/index-666d2e09.js", "assets/index-666d2e09.js") + ].contributing_modules( + pr_changed_files=[ + "src/index.css", + "src/main.tsx", + "./index.html", + "/src/App.css", + ] # first 3 is in, 4th is not + ) + assert [module.name for module in module_reports] == [ + "./src/index.css", + "./src/main.tsx", + "./index.html", + ] + + +def test_bundle_asset_comparison_using_uuid(): + """ + In the default setup we have: + (base:index-666d2e09.js, head:index-666d2e09.js): 144577 -> 144577 + (base:index-c8676264.js, head:index-c8676264.js): 154 -> 254 + this matches based on closes size delta, now we will update to the following UUIDs + base:index-666d2e09.js -> UUID=123 + base:index-c8676264.js -> UUID=456 + head:index-666d2e09.js -> UUID=456 + head:index-c8676264.js -> UUID=123 + this will yield the following comparisons + (base:index-666d2e09.js, head:index-c8676264.js): 144577 -> 254 + (base:index-c8676264.js, head:index-666d2e09.js): 154 -> 144577 + """ + loader = BundleAnalysisReportLoader( + storage_service=MemoryStorageService({}), + repo_key="testing", + ) + + comparison = BundleAnalysisComparison( + loader=loader, + base_report_key="base-report", + head_report_key="head-report", + ) + + # raises errors when either report doesn't exist in storage + with pytest.raises(MissingBaseReportError): + comparison.base_report + with pytest.raises(MissingHeadReportError): + comparison.head_report + + try: + base_report = BundleAnalysisReport() + base_report.ingest(base_report_bundle_stats_path) + + old_bundle = Bundle(name="old") + with get_db_session(base_report.db_path) as db_session: + db_session.add(old_bundle) + db_session.commit() + + head_report = BundleAnalysisReport() + head_report.ingest(head_report_bundle_stats_path) + + new_bundle = Bundle(name="new") + with get_db_session(head_report.db_path) as db_session: + db_session.add(new_bundle) + db_session.commit() + + loader.save(base_report, "base-report") + loader.save(head_report, "head-report") + finally: + base_report.cleanup() + head_report.cleanup() + + # Update the UUIDs + with get_db_session(comparison.base_report.db_path) as db_session: + from shared.bundle_analysis.models import Asset + + db_session.query(Asset).filter(Asset.name == "assets/index-666d2e09.js").update( + {Asset.uuid: "123"}, synchronize_session="fetch" + ) + db_session.query(Asset).filter(Asset.name == "assets/index-c8676264.js").update( + {Asset.uuid: "456"}, synchronize_session="fetch" + ) + db_session.commit() + + with get_db_session(comparison.head_report.db_path) as db_session: + from shared.bundle_analysis.models import Asset + + db_session.query(Asset).filter(Asset.name == "assets/index-666d2e09.js").update( + {Asset.uuid: "456"}, synchronize_session="fetch" + ) + db_session.query(Asset).filter(Asset.name == "assets/index-c8676264.js").update( + {Asset.uuid: "123"}, synchronize_session="fetch" + ) + db_session.commit() + + bundle_changes = comparison.bundle_changes() + assert set(bundle_changes) == set( + [ + BundleChange( + bundle_name="sample", + change_type=BundleChange.ChangeType.CHANGED, + size_delta=1100, + percentage_delta=0.73, ), - AssetChange( - asset_name="assets/index-*.js", - change_type=AssetChange.ChangeType.CHANGED, + BundleChange( + bundle_name="new", + change_type=BundleChange.ChangeType.ADDED, size_delta=0, + percentage_delta=100, ), - AssetChange( - asset_name="assets/react-*.svg", - change_type=AssetChange.ChangeType.REMOVED, - size_delta=-4126, + BundleChange( + bundle_name="old", + change_type=BundleChange.ChangeType.REMOVED, + size_delta=0, + percentage_delta=-100, ), ] ) - total_size_delta = bundle_comparison.total_size_delta() - assert total_size_delta == 1100 - assert total_size_delta == sum([change.size_delta for change in asset_changes]) - assert comparison.percentage_delta == 0.73 - - with pytest.raises(MissingBundleError): - comparison.bundle_comparison("new") + bundle_comparison = comparison.bundle_comparison("sample") + asset_comparisons = bundle_comparison.asset_comparisons() + assert len(asset_comparisons) == 6 + + asset_comparison_d = {} + for asset_comparison in asset_comparisons: + key = ( + asset_comparison.base_asset_report.hashed_name + if asset_comparison.base_asset_report + else None, + asset_comparison.head_asset_report.hashed_name + if asset_comparison.head_asset_report + else None, + ) + assert key not in asset_comparison_d + asset_comparison_d[key] = asset_comparison + + # Check asset change is correct + assert asset_comparison_d[ + ("assets/index-666d2e09.js", "assets/index-c8676264.js") + ].asset_change() == AssetChange( + change_type=AssetChange.ChangeType.CHANGED, + size_delta=-144323, + asset_name="assets/index-*.js", + percentage_delta=-99.82, + size_base=144577, + size_head=254, + ) + assert asset_comparison_d[ + ("assets/index-c8676264.js", "assets/index-666d2e09.js") + ].asset_change() == AssetChange( + change_type=AssetChange.ChangeType.CHANGED, + size_delta=144423, + asset_name="assets/index-*.js", + percentage_delta=93781.17, + size_base=154, + size_head=144577, + ) + assert asset_comparison_d[ + (None, "assets/other-35ef61ed.svg") + ].asset_change() == AssetChange( + change_type=AssetChange.ChangeType.ADDED, + size_delta=5126, + asset_name="assets/other-*.svg", + percentage_delta=100, + size_base=0, + size_head=5126, + ) + assert asset_comparison_d[ + ("assets/index-d526a0c5.css", "assets/index-d526a0c5.css") + ].asset_change() == AssetChange( + change_type=AssetChange.ChangeType.CHANGED, + size_delta=0, + asset_name="assets/index-*.css", + percentage_delta=0, + size_base=1421, + size_head=1421, + ) + assert asset_comparison_d[ + ("assets/LazyComponent-fcbb0922.js", "assets/LazyComponent-fcbb0922.js") + ].asset_change() == AssetChange( + change_type=AssetChange.ChangeType.CHANGED, + size_delta=0, + asset_name="assets/LazyComponent-*.js", + percentage_delta=0, + size_base=294, + size_head=294, + ) + assert asset_comparison_d[ + ("assets/react-35ef61ed.svg", None) + ].asset_change() == AssetChange( + change_type=AssetChange.ChangeType.REMOVED, + size_delta=-4126, + asset_name="assets/react-*.svg", + percentage_delta=-100, + size_base=4126, + size_head=0, + ) def test_bundle_analysis_total_size_delta():