Skip to content

Commit

Permalink
feat(ingest): docs - overhaul source connector docs to make it code d…
Browse files Browse the repository at this point in the history
…riven (#4798)

Co-authored-by: MugdhaHardikar-GSLab <[email protected]>
  • Loading branch information
shirshanka and MugdhaHardikar-GSLab authored May 2, 2022
1 parent 0a0db02 commit a9ad138
Show file tree
Hide file tree
Showing 206 changed files with 6,028 additions and 559 deletions.
2 changes: 1 addition & 1 deletion docs-website/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ task generateGraphQLDocumentation(type: YarnTask, dependsOn: [yarnInstall, gener
args = ['docusaurus', 'docs:generate:graphql']
}

task yarnGenerate(type: YarnTask, dependsOn: [yarnInstall, generateGraphQLDocumentation, ':metadata-ingestion:modelDocGen'] ) {
task yarnGenerate(type: YarnTask, dependsOn: [yarnInstall, generateGraphQLDocumentation, ':metadata-ingestion:modelDocGen', ':metadata-ingestion:docGen'] ) {
inputs.files(projectMdFiles)
outputs.cacheIf { true }
args = ['run', 'generate']
Expand Down
9 changes: 7 additions & 2 deletions docs-website/docusaurus.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ module.exports = {
favicon: "img/favicon.ico",
organizationName: "linkedin", // Usually your GitHub org/user name.
projectName: "datahub", // Usually your repo name.
stylesheets: ["https://fonts.googleapis.com/css2?family=Manrope:wght@400;600&display=swap"],
stylesheets: [
"https://fonts.googleapis.com/css2?family=Manrope:wght@400;600&display=swap",
],
themeConfig: {
colorMode: {
switchConfig: {
Expand Down Expand Up @@ -198,7 +200,10 @@ module.exports = {
],
],
plugins: [
["@docusaurus/plugin-ideal-image", { quality: 100, sizes: [320, 640, 1280, 1440, 1600] }],
[
"@docusaurus/plugin-ideal-image",
{ quality: 100, sizes: [320, 640, 1280, 1440, 1600] },
],
"docusaurus-plugin-sass",
[
"docusaurus-graphql-plugin",
Expand Down
21 changes: 17 additions & 4 deletions docs-website/generateDocsDir.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ function list_markdown_files(): string[] {
.trim()
.split("\n");
let all_generated_markdown_files = execSync(
"cd .. && ls docs/generated/metamodel/**/*.md"
"cd .. && ls docs/generated/**/**/*.md"
)
.toString()
.trim()
Expand Down Expand Up @@ -100,6 +100,8 @@ function list_markdown_files(): string[] {
/^datahub-kubernetes\//,
// Various other docs/directories to ignore.
/^metadata-models\/docs\//, // these are used to generate docs, so we don't want to consider them here
/^metadata-ingestion\/archived\//, // these are archived, so we don't want to consider them here
/^metadata-ingestion\/docs\/sources\//, // these are used to generate docs, so we don't want to consider them here
/^metadata-ingestion-examples\//,
/^docker\/(?!README|datahub-upgrade|airflow\/local_airflow)/, // Drop all but a few docker docs.
/^docs\/rfc\/templates\/000-template\.md$/,
Expand Down Expand Up @@ -321,6 +323,7 @@ function new_url(original: string, filepath: string): string {
const updated = path.normalize(
`${"../".repeat(up_levels + 2)}/${relation}/${original}`
);
//console.log(`Rewriting ${original} ${filepath} as ${updated}`);
return updated;
} else {
throw new Error(`unknown extension - ${original} in ${filepath}`);
Expand Down Expand Up @@ -479,12 +482,18 @@ function write_markdown_file(
): void {
const pathname = path.dirname(output_filepath);
fs.mkdirSync(pathname, { recursive: true });
fs.writeFileSync(output_filepath, contents.stringify(""));
try {
fs.writeFileSync(output_filepath, contents.stringify(""));
} catch (error) {
console.log(`Failed to write file ${output_filepath}`);
console.log(`contents = ${contents}`);
throw error;
}
}

(async function main() {
for (const filepath of markdown_files) {
// console.log("Processing:", filepath);
//console.log("Processing:", filepath);
const contents_string = fs.readFileSync(`../${filepath}`).toString();
const contents = matter(contents_string);

Expand All @@ -507,7 +516,11 @@ function write_markdown_file(
}

// Error if a doc is not accounted for in a sidebar.
const autogenerated_sidebar_directories = ["docs/generated/metamodel"];
const autogenerated_sidebar_directories = [
"docs/generated/metamodel",
"docs/generated/ingestion",
"metadata-ingestion/archived",
];
for (const filepath of markdown_files) {
if (
autogenerated_sidebar_directories.some((dir) => filepath.startsWith(dir))
Expand Down
17 changes: 9 additions & 8 deletions docs-website/sidebars.js
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,12 @@ module.exports = {
id: "metadata-ingestion/README",
},
{
Sources: list_ids_in_directory("metadata-ingestion/source_docs", {
"metadata-ingestion/source_docs/s3": "S3",
}),
Sources: [
{
type: "autogenerated",
dirName: "docs/generated/ingestion/sources", // '.' means the current docs folder
},
],
},
"metadata-ingestion/transformers",
{
Expand Down Expand Up @@ -108,6 +111,9 @@ module.exports = {
"docs/how/add-custom-data-platform",
"docs/platform-instances",
"docs/how/add-user-data",
"metadata-ingestion/docs/dev_guides/stateful",
"metadata-ingestion/docs/dev_guides/reporting_telemetry",
"metadata-ingestion/docs/dev_guides/sql_profiles",
],
},
],
Expand Down Expand Up @@ -212,11 +218,6 @@ module.exports = {
"docs/docker/development",
"docs/how/backup-datahub",
"docs/how/updating-datahub",
{
type: "doc",
label: "Ingesting files from S3",
id: "metadata-ingestion/source_docs/s3",
},
//"metadata-ingestion/examples/transforms/README"
//"docs/what/graph",
//"docs/what/search-index",
Expand Down
77 changes: 38 additions & 39 deletions docs/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,45 +63,44 @@ We use a plugin architecture so that you can install only the dependencies you a

| Plugin Name | Install Command | Provides |
|-------------------------------------------------------------------------------------|------------------------------------------------------------| ----------------------------------- |
| [file](../metadata-ingestion/source_docs/file.md) | _included by default_ | File source and sink |
| [athena](../metadata-ingestion/source_docs/athena.md) | `pip install 'acryl-datahub[athena]'` | AWS Athena source |
| [bigquery](../metadata-ingestion/source_docs/bigquery.md) | `pip install 'acryl-datahub[bigquery]'` | BigQuery source |
| [bigquery-usage](../metadata-ingestion/source_docs/bigquery.md) | `pip install 'acryl-datahub[bigquery-usage]'` | BigQuery usage statistics source |
| [datahub-lineage-file](../metadata-ingestion/source_docs/file_lineage.md) | _no additional dependencies_ | Lineage File source |
| [datahub-business-glossary](../metadata-ingestion/source_docs/business_glossary.md) | _no additional dependencies_ | Business Glossary File source |
| [dbt](../metadata-ingestion/source_docs/dbt.md) | _no additional dependencies_ | dbt source |
| [druid](../metadata-ingestion/source_docs/druid.md) | `pip install 'acryl-datahub[druid]'` | Druid Source |
| [feast-legacy](../metadata-ingestion/source_docs/feast_legacy.md) | `pip install 'acryl-datahub[feast-legacy]'` | Feast source (legacy) |
| [feast](../metadata-ingestion/source_docs/feast.md) | `pip install 'acryl-datahub[feast]'` | Feast source (0.18.0) |
| [glue](../metadata-ingestion/source_docs/glue.md) | `pip install 'acryl-datahub[glue]'` | AWS Glue source |
| [hive](../metadata-ingestion/source_docs/hive.md) | `pip install 'acryl-datahub[hive]'` | Hive source |
| [kafka](../metadata-ingestion/source_docs/kafka.md) | `pip install 'acryl-datahub[kafka]'` | Kafka source |
| [kafka-connect](../metadata-ingestion/source_docs/kafka-connect.md) | `pip install 'acryl-datahub[kafka-connect]'` | Kafka connect source |
| [ldap](../metadata-ingestion/source_docs/ldap.md) | `pip install 'acryl-datahub[ldap]'` ([extra requirements]) | LDAP source |
| [looker](../metadata-ingestion/source_docs/looker.md) | `pip install 'acryl-datahub[looker]'` | Looker source |
| [lookml](../metadata-ingestion/source_docs/lookml.md) | `pip install 'acryl-datahub[lookml]'` | LookML source, requires Python 3.7+ |
| [metabase](../metadata-ingestion/source_docs/metabase.md) | `pip install 'acryl-datahub[metabase]'` | Metabase source |
| [mode](../metadata-ingestion/source_docs/mode.md) | `pip install 'acryl-datahub[mode]'` | Mode Analytics source |
| [mongodb](../metadata-ingestion/source_docs/mongodb.md) | `pip install 'acryl-datahub[mongodb]'` | MongoDB source |
| [mssql](../metadata-ingestion/source_docs/mssql.md) | `pip install 'acryl-datahub[mssql]'` | SQL Server source |
| [mysql](../metadata-ingestion/source_docs/mysql.md) | `pip install 'acryl-datahub[mysql]'` | MySQL source |
| [mariadb](../metadata-ingestion/source_docs/mariadb.md) | `pip install 'acryl-datahub[mariadb]'` | MariaDB source |
| [openapi](../metadata-ingestion/source_docs/openapi.md) | `pip install 'acryl-datahub[openapi]'` | OpenApi Source |
| [oracle](../metadata-ingestion/source_docs/oracle.md) | `pip install 'acryl-datahub[oracle]'` | Oracle source |
| [postgres](../metadata-ingestion/source_docs/postgres.md) | `pip install 'acryl-datahub[postgres]'` | Postgres source |
| [redash](../metadata-ingestion/source_docs/redash.md) | `pip install 'acryl-datahub[redash]'` | Redash source |
| [redshift](../metadata-ingestion/source_docs/redshift.md) | `pip install 'acryl-datahub[redshift]'` | Redshift source |
| [sagemaker](../metadata-ingestion/source_docs/sagemaker.md) | `pip install 'acryl-datahub[sagemaker]'` | AWS SageMaker source |
| [snowflake](../metadata-ingestion/source_docs/snowflake.md) | `pip install 'acryl-datahub[snowflake]'` | Snowflake source |
| [snowflake-usage](../metadata-ingestion/source_docs/snowflake.md) | `pip install 'acryl-datahub[snowflake-usage]'` | Snowflake usage statistics source |
| [sql-profiles](../metadata-ingestion/source_docs/sql_profiles.md) | `pip install 'acryl-datahub[sql-profiles]'` | Data profiles for SQL-based systems |
| [sqlalchemy](../metadata-ingestion/source_docs/sqlalchemy.md) | `pip install 'acryl-datahub[sqlalchemy]'` | Generic SQLAlchemy source |
| [superset](../metadata-ingestion/source_docs/superset.md) | `pip install 'acryl-datahub[superset]'` | Superset source |
| [tableau](../metadata-ingestion/source_docs/tableau.md) | `pip install 'acryl-datahub[tableau]'` | Tableau source |
| [trino](../metadata-ingestion/source_docs/trino.md) | `pip install 'acryl-datahub[trino]'` | Trino source |
| [starburst-trino-usage](../metadata-ingestion/source_docs/trino.md) | `pip install 'acryl-datahub[starburst-trino-usage]'` | Starburst Trino usage statistics source |
| [nifi](../metadata-ingestion/source_docs/nifi.md) | `pip install 'acryl-datahub[nifi]'` | Nifi source |
| [powerbi](../metadata-ingestion/source_docs/powerbi.md) | `pip install 'acryl-datahub[powerbi]'` | Microsoft Power BI source |
| [file](./generated/ingestion/sources/file.md) | _included by default_ | File source and sink |
| [athena](./generated/ingestion/sources/athena.md) | `pip install 'acryl-datahub[athena]'` | AWS Athena source |
| [bigquery](./generated/ingestion/sources/bigquery.md) | `pip install 'acryl-datahub[bigquery]'` | BigQuery source |
| [bigquery-usage](./generated/ingestion/sources/bigquery.md#module-bigquery-usage) | `pip install 'acryl-datahub[bigquery-usage]'` | BigQuery usage statistics source |
| [datahub-lineage-file](./generated/ingestion/sources/file-based-lineage.md) | _no additional dependencies_ | Lineage File source |
| [datahub-business-glossary](./generated/ingestion/sources/business-glossary.md) | _no additional dependencies_ | Business Glossary File source |
| [dbt](./generated/ingestion/sources/dbt.md) | _no additional dependencies_ | dbt source |
| [druid](./generated/ingestion/sources/druid.md) | `pip install 'acryl-datahub[druid]'` | Druid Source |
| [feast-legacy](./generated/ingestion/sources/feast.md#module-feast-legacy) | `pip install 'acryl-datahub[feast-legacy]'` | Feast source (legacy) |
| [feast](./generated/ingestion/sources/feast.md) | `pip install 'acryl-datahub[feast]'` | Feast source (0.18.0) |
| [glue](./generated/ingestion/sources/glue.md) | `pip install 'acryl-datahub[glue]'` | AWS Glue source |
| [hive](./generated/ingestion/sources/hive.md) | `pip install 'acryl-datahub[hive]'` | Hive source |
| [kafka](./generated/ingestion/sources/kafka.md) | `pip install 'acryl-datahub[kafka]'` | Kafka source |
| [kafka-connect](./generated/ingestion/sources/kafka-connect.md) | `pip install 'acryl-datahub[kafka-connect]'` | Kafka connect source |
| [ldap](./generated/ingestion/sources/ldap.md) | `pip install 'acryl-datahub[ldap]'` ([extra requirements]) | LDAP source |
| [looker](./generated/ingestion/sources/looker.md) | `pip install 'acryl-datahub[looker]'` | Looker source |
| [lookml](./generated/ingestion/sources/looker.md#module-lookml) | `pip install 'acryl-datahub[lookml]'` | LookML source, requires Python 3.7+ |
| [metabase](./generated/ingestion/sources/metabase.md) | `pip install 'acryl-datahub[metabase]'` | Metabase source |
| [mode](./generated/ingestion/sources/mode.md) | `pip install 'acryl-datahub[mode]'` | Mode Analytics source |
| [mongodb](./generated/ingestion/sources/mongodb.md) | `pip install 'acryl-datahub[mongodb]'` | MongoDB source |
| [mssql](./generated/ingestion/sources/mssql.md) | `pip install 'acryl-datahub[mssql]'` | SQL Server source |
| [mysql](./generated/ingestion/sources/mysql.md) | `pip install 'acryl-datahub[mysql]'` | MySQL source |
| [mariadb](./generated/ingestion/sources/mariadb.md) | `pip install 'acryl-datahub[mariadb]'` | MariaDB source |
| [openapi](./generated/ingestion/sources/openapi.md) | `pip install 'acryl-datahub[openapi]'` | OpenApi Source |
| [oracle](./generated/ingestion/sources/oracle.md) | `pip install 'acryl-datahub[oracle]'` | Oracle source |
| [postgres](./generated/ingestion/sources/postgres.md) | `pip install 'acryl-datahub[postgres]'` | Postgres source |
| [redash](./generated/ingestion/sources/redash.md) | `pip install 'acryl-datahub[redash]'` | Redash source |
| [redshift](./generated/ingestion/sources/redshift.md) | `pip install 'acryl-datahub[redshift]'` | Redshift source |
| [sagemaker](./generated/ingestion/sources/sagemaker.md) | `pip install 'acryl-datahub[sagemaker]'` | AWS SageMaker source |
| [snowflake](./generated/ingestion/sources/snowflake.md) | `pip install 'acryl-datahub[snowflake]'` | Snowflake source |
| [snowflake-usage](./generated/ingestion/sources/snowflake.md#module-snowflake-usage) | `pip install 'acryl-datahub[snowflake-usage]'` | Snowflake usage statistics source |
| [sqlalchemy](./generated/ingestion/sources/sqlalchemy.md) | `pip install 'acryl-datahub[sqlalchemy]'` | Generic SQLAlchemy source |
| [superset](./generated/ingestion/sources/superset.md) | `pip install 'acryl-datahub[superset]'` | Superset source |
| [tableau](./generated/ingestion/sources/tableau.md) | `pip install 'acryl-datahub[tableau]'` | Tableau source |
| [trino](./generated/ingestion/sources/trino.md) | `pip install 'acryl-datahub[trino]'` | Trino source |
| [starburst-trino-usage](./generated/ingestion/sources/trino.md) | `pip install 'acryl-datahub[starburst-trino-usage]'` | Starburst Trino usage statistics source |
| [nifi](./generated/ingestion/sources/nifi.md) | `pip install 'acryl-datahub[nifi]'` | Nifi source |
| [powerbi](./generated/ingestion/sources/powerbi.md) | `pip install 'acryl-datahub[powerbi]'` | Microsoft Power BI source |

### Sinks

Expand Down
Loading

0 comments on commit a9ad138

Please sign in to comment.