diff --git a/README.md b/README.md index e01976c..9916214 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ Supports both DataHub Core and DataHub Cloud. - Fetching metadata for any entity - Traversing the lineage graph, both upstream and downstream - Listing SQL queries associated with a dataset +- Querying schema versions and schema blame. ## Demo diff --git a/src/mcp_server_datahub/gql/entity_details.gql b/src/mcp_server_datahub/gql/entity_details.gql index cf5d619..379247d 100644 --- a/src/mcp_server_datahub/gql/entity_details.gql +++ b/src/mcp_server_datahub/gql/entity_details.gql @@ -1205,3 +1205,45 @@ query GetEntityLineage($input: SearchAcrossLineageInput!) { } } } + +query getSchemaVersionList($input: GetSchemaVersionListInput!) { + getSchemaVersionList(input: $input) { + latestVersion { + semanticVersion + semanticVersionTimestamp + versionStamp + __typename + } + semanticVersionList { + semanticVersion + semanticVersionTimestamp + versionStamp + __typename + } + __typename + } +} + + +query getSchemaBlame($input: GetSchemaBlameInput!) { + getSchemaBlame(input: $input) { + version { + semanticVersion + semanticVersionTimestamp + versionStamp + __typename + } + schemaFieldBlameList { + fieldPath + schemaFieldChange { + timestampMillis + lastSemanticVersion + lastSchemaFieldChange + versionStamp + __typename + } + __typename + } + __typename + } +} diff --git a/src/mcp_server_datahub/mcp_server.py b/src/mcp_server_datahub/mcp_server.py index 1a60d5d..12acb7d 100644 --- a/src/mcp_server_datahub/mcp_server.py +++ b/src/mcp_server_datahub/mcp_server.py @@ -313,6 +313,47 @@ def get_lineage(urn: str, upstream: bool, max_hops: int = 1) -> dict: return lineage +@mcp.tool(description="Retrieve schema versions for a given dataset URN.") +def get_schema_versions(dataset_urn: str) -> List[Dict[str, Any]]: + client = get_client() + + variables = { + "input": { + "datasetUrn": dataset_urn, + } + } + + response = _execute_graphql( + client._graph, + query=entity_details_fragment_gql, + variables=variables, + operation_name="getSchemaVersionList", + ) + + return response.get("getSchemaVersionList", []) + + +@mcp.tool(description="Use this tool to get a schema blame.") +def get_schema_blame(datasetUrn: str, version: str) -> Dict[str, Any]: + client = get_client() + + variables = { + "input": { + "datasetUrn": datasetUrn, + "version": version, + } + } + + resp = _execute_graphql( + client._graph, + query=entity_details_fragment_gql, + variables=variables, + operation_name="getSchemaBlame", + ) + + return resp.get("getSchemaBlame", {}) + + if __name__ == "__main__": import sys @@ -348,3 +389,9 @@ def _divider() -> None: _divider() print("Getting queries", urn) print(json.dumps(get_dataset_queries(urn), indent=2)) + _divider() + print("Getting schema versions", urn) + print(json.dumps(get_schema_versions(urn), indent=2)) + _divider() + print("Getting schema blame", urn) + print(json.dumps(get_schema_blame(urn, "latest"), indent=2)) diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 8af8425..a4e8326 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -10,6 +10,8 @@ get_dataset_queries, get_entity, get_lineage, + get_schema_blame, + get_schema_versions, search, with_client, ) @@ -58,6 +60,16 @@ def test_search() -> None: assert res is not None +def test_get_schema_version() -> None: + res = get_schema_versions(_test_urn) + assert res is not None + + +def test_get_schema_blame() -> None: + res = get_schema_blame(_test_urn) + assert res is not None + + if __name__ == "__main__": import pytest