Skip to main content
Version: Next

Models

AccessClass

class datahub.metadata.schema_classes.AccessClass(roles = None)

Bases: _Aspect

Aspect used for associating roles to a dataset or any asset

property roles : None | List[RoleAssociationClass]

List of Roles which needs to be associated

AccessLevelClass

class datahub.metadata.schema_classes.AccessLevelClass()

Bases: object

The various access levels

PRIVATE = 'PRIVATE'

PUBLIC = 'PUBLIC'

ActorsClass

class datahub.metadata.schema_classes.ActorsClass(users = None)

Bases: _Aspect

Provisioned users of a role

property users : None | List[RoleUserClass]

List of provisioned users of a role

ApplicationKeyClass

class datahub.metadata.schema_classes.ApplicationKeyClass(id)

Bases: _Aspect

Key for a Query

  • Parameters:id (str)

property id : str

A unique id for the Application.

ApplicationPropertiesClass

class datahub.metadata.schema_classes.ApplicationPropertiesClass(customProperties = None, externalUrl = None, name = None, description = None)

Bases: _Aspect

The main properties of an Application

  • Parameters:
    • customProperties (Optional[Dict[str, str]])
    • externalUrl (Optional[str])
    • name (Optional[str])
    • description (Optional[str])

property customProperties : Dict[str, str]

Custom property bag.

property description : None | str

Documentation of the application

property externalUrl : None | str

URL where the reference exist

property name : None | str

Display name of the Application

ApplicationsClass

class datahub.metadata.schema_classes.ApplicationsClass(applications)

Bases: _Aspect

Links from an Asset to its Applications

  • Parameters:applications (List[str])

property applications : List[str]

The Applications attached to an Asset

ArrayTypeClass

class datahub.metadata.schema_classes.ArrayTypeClass(nestedType = None)

Bases: DictWrapper

Array field type.

  • Parameters:nestedType (Optional[List[str]])

property nestedType : None | List[str]

List of types this array holds.

AspectBag

class datahub.metadata.schema_classes.AspectBag()

Bases: TypedDict

access : AccessClass

actors : ActorsClass

applicationKey : ApplicationKeyClass

applicationProperties : ApplicationPropertiesClass

applications : ApplicationsClass

assertionActions : AssertionActionsClass

assertionInfo : AssertionInfoClass

assertionKey : AssertionKeyClass

assertionRunEvent : AssertionRunEventClass

browsePaths : BrowsePathsClass

browsePathsV2 : BrowsePathsV2Class

businessAttributeInfo : BusinessAttributeInfoClass

businessAttributeKey : BusinessAttributeKeyClass

businessAttributes : BusinessAttributesClass

chartInfo : ChartInfoClass

chartKey : ChartKeyClass

chartQuery : ChartQueryClass

chartUsageStatistics : ChartUsageStatisticsClass

container : ContainerClass

containerKey : ContainerKeyClass

containerProperties : ContainerPropertiesClass

corpGroupEditableInfo : CorpGroupEditableInfoClass

corpGroupInfo : CorpGroupInfoClass

corpGroupKey : CorpGroupKeyClass

corpUserCredentials : CorpUserCredentialsClass

corpUserEditableInfo : CorpUserEditableInfoClass

corpUserInfo : CorpUserInfoClass

corpUserKey : CorpUserKeyClass

corpUserSettings : CorpUserSettingsClass

corpUserStatus : CorpUserStatusClass

cost : CostClass

dashboardInfo : DashboardInfoClass

dashboardKey : DashboardKeyClass

dashboardUsageStatistics : DashboardUsageStatisticsClass

dataContractKey : DataContractKeyClass

dataContractProperties : DataContractPropertiesClass

dataContractStatus : DataContractStatusClass

dataFlowInfo : DataFlowInfoClass

dataFlowKey : DataFlowKeyClass

dataHubAccessTokenInfo : DataHubAccessTokenInfoClass

dataHubAccessTokenKey : DataHubAccessTokenKeyClass

dataHubActionKey : DataHubActionKeyClass

dataHubConnectionDetails : DataHubConnectionDetailsClass

dataHubConnectionKey : DataHubConnectionKeyClass

dataHubExecutionRequestInput : ExecutionRequestInputClass

dataHubExecutionRequestKey : ExecutionRequestKeyClass

dataHubExecutionRequestResult : ExecutionRequestResultClass

dataHubExecutionRequestSignal : ExecutionRequestSignalClass

dataHubIngestionSourceInfo : DataHubIngestionSourceInfoClass

dataHubIngestionSourceKey : DataHubIngestionSourceKeyClass

dataHubOpenAPISchemaKey : DataHubOpenAPISchemaKeyClass

dataHubPersonaInfo : DataHubPersonaInfoClass

dataHubPersonaKey : DataHubPersonaKeyClass

dataHubPolicyInfo : DataHubPolicyInfoClass

dataHubPolicyKey : DataHubPolicyKeyClass

dataHubRetentionConfig : DataHubRetentionConfigClass

dataHubRetentionKey : DataHubRetentionKeyClass

dataHubRoleInfo : DataHubRoleInfoClass

dataHubRoleKey : DataHubRoleKeyClass

dataHubSecretKey : DataHubSecretKeyClass

dataHubSecretValue : DataHubSecretValueClass

dataHubStepStateKey : DataHubStepStateKeyClass

dataHubStepStateProperties : DataHubStepStatePropertiesClass

dataHubUpgradeKey : DataHubUpgradeKeyClass

dataHubUpgradeRequest : DataHubUpgradeRequestClass

dataHubUpgradeResult : DataHubUpgradeResultClass

dataHubViewInfo : DataHubViewInfoClass

dataHubViewKey : DataHubViewKeyClass

dataJobInfo : DataJobInfoClass

dataJobInputOutput : DataJobInputOutputClass

dataJobKey : DataJobKeyClass

dataPlatformInfo : DataPlatformInfoClass

dataPlatformInstance : DataPlatformInstanceClass

dataPlatformInstanceKey : DataPlatformInstanceKeyClass

dataPlatformInstanceProperties : DataPlatformInstancePropertiesClass

dataPlatformKey : DataPlatformKeyClass

dataProcessInfo : DataProcessInfoClass

dataProcessInstanceInput : DataProcessInstanceInputClass

dataProcessInstanceKey : DataProcessInstanceKeyClass

dataProcessInstanceOutput : DataProcessInstanceOutputClass

dataProcessInstanceProperties : DataProcessInstancePropertiesClass

dataProcessInstanceRelationships : DataProcessInstanceRelationshipsClass

dataProcessInstanceRunEvent : DataProcessInstanceRunEventClass

dataProcessKey : DataProcessKeyClass

dataProductKey : DataProductKeyClass

dataProductProperties : DataProductPropertiesClass

dataTransformLogic : DataTransformLogicClass

dataTypeInfo : DataTypeInfoClass

dataTypeKey : DataTypeKeyClass

datahubIngestionCheckpoint : DatahubIngestionCheckpointClass

datahubIngestionRunSummary : DatahubIngestionRunSummaryClass

datasetDeprecation : DatasetDeprecationClass

datasetKey : DatasetKeyClass

datasetProfile : DatasetProfileClass

datasetProperties : DatasetPropertiesClass

datasetUpstreamLineage : DatasetUpstreamLineageClass

datasetUsageStatistics : DatasetUsageStatisticsClass

deprecation : DeprecationClass

displayProperties : DisplayPropertiesClass

documentation : DocumentationClass

domainKey : DomainKeyClass

domainProperties : DomainPropertiesClass

domains : DomainsClass

dynamicFormAssignment : DynamicFormAssignmentClass

editableChartProperties : EditableChartPropertiesClass

editableContainerProperties : EditableContainerPropertiesClass

editableDashboardProperties : EditableDashboardPropertiesClass

editableDataFlowProperties : EditableDataFlowPropertiesClass

editableDataJobProperties : EditableDataJobPropertiesClass

editableDatasetProperties : EditableDatasetPropertiesClass

editableERModelRelationshipProperties : EditableERModelRelationshipPropertiesClass

editableMlFeatureProperties : EditableMLFeaturePropertiesClass

editableMlFeatureTableProperties : EditableMLFeatureTablePropertiesClass

editableMlModelGroupProperties : EditableMLModelGroupPropertiesClass

editableMlModelProperties : EditableMLModelPropertiesClass

editableMlPrimaryKeyProperties : EditableMLPrimaryKeyPropertiesClass

editableNotebookProperties : EditableNotebookPropertiesClass

editableSchemaMetadata : EditableSchemaMetadataClass

embed : EmbedClass

entityTypeInfo : EntityTypeInfoClass

entityTypeKey : EntityTypeKeyClass

erModelRelationshipKey : ERModelRelationshipKeyClass

erModelRelationshipProperties : ERModelRelationshipPropertiesClass

formInfo : FormInfoClass

formKey : FormKeyClass

forms : FormsClass

globalSettingsInfo : GlobalSettingsInfoClass

globalSettingsKey : GlobalSettingsKeyClass

globalTags : GlobalTagsClass

glossaryNodeInfo : GlossaryNodeInfoClass

glossaryNodeKey : GlossaryNodeKeyClass

glossaryRelatedTerms : GlossaryRelatedTermsClass

glossaryTermInfo : GlossaryTermInfoClass

glossaryTermKey : GlossaryTermKeyClass

glossaryTerms : GlossaryTermsClass

groupMembership : GroupMembershipClass

icebergCatalogInfo : IcebergCatalogInfoClass

icebergWarehouseInfo : IcebergWarehouseInfoClass

incidentInfo : IncidentInfoClass

incidentKey : IncidentKeyClass

incidentSource : IncidentSourceClass

incidentsSummary : IncidentsSummaryClass

inputFields : InputFieldsClass

institutionalMemory : InstitutionalMemoryClass

intendedUse : IntendedUseClass

inviteToken : InviteTokenClass

inviteTokenKey : InviteTokenKeyClass

mlFeatureKey : MLFeatureKeyClass

mlFeatureProperties : MLFeaturePropertiesClass

mlFeatureTableKey : MLFeatureTableKeyClass

mlFeatureTableProperties : MLFeatureTablePropertiesClass

mlHyperParam : MLHyperParamClass

mlMetric : MLMetricClass

mlModelCaveatsAndRecommendations : CaveatsAndRecommendationsClass

mlModelDeploymentKey : MLModelDeploymentKeyClass

mlModelDeploymentProperties : MLModelDeploymentPropertiesClass

mlModelEthicalConsiderations : EthicalConsiderationsClass

mlModelEvaluationData : EvaluationDataClass

mlModelFactorPrompts : MLModelFactorPromptsClass

mlModelGroupKey : MLModelGroupKeyClass

mlModelGroupProperties : MLModelGroupPropertiesClass

mlModelKey : MLModelKeyClass

mlModelMetrics : MetricsClass

mlModelProperties : MLModelPropertiesClass

mlModelQuantitativeAnalyses : QuantitativeAnalysesClass

mlModelTrainingData : TrainingDataClass

mlPrimaryKeyKey : MLPrimaryKeyKeyClass

mlPrimaryKeyProperties : MLPrimaryKeyPropertiesClass

mlTrainingRunProperties : MLTrainingRunPropertiesClass

nativeGroupMembership : NativeGroupMembershipClass

notebookContent : NotebookContentClass

notebookInfo : NotebookInfoClass

notebookKey : NotebookKeyClass

operation : OperationClass

origin : OriginClass

ownership : OwnershipClass

ownershipTypeInfo : OwnershipTypeInfoClass

ownershipTypeKey : OwnershipTypeKeyClass

partitionsSummary : PartitionsSummaryClass

platformResourceInfo : PlatformResourceInfoClass

platformResourceKey : PlatformResourceKeyClass

postInfo : PostInfoClass

postKey : PostKeyClass

propertyDefinition : StructuredPropertyDefinitionClass

queryKey : QueryKeyClass

queryProperties : QueryPropertiesClass

querySubjects : QuerySubjectsClass

queryUsageStatistics : QueryUsageStatisticsClass

roleKey : RoleKeyClass

roleMembership : RoleMembershipClass

roleProperties : RolePropertiesClass

schemaFieldAliases : SchemaFieldAliasesClass

schemaFieldKey : SchemaFieldKeyClass

schemaMetadata : SchemaMetadataClass

schemafieldInfo : SchemaFieldInfoClass

siblings : SiblingsClass

slackUserInfo : SlackUserInfoClass

sourceCode : SourceCodeClass

status : StatusClass

structuredProperties : StructuredPropertiesClass

structuredPropertyKey : StructuredPropertyKeyClass

structuredPropertySettings : StructuredPropertySettingsClass

subTypes : SubTypesClass

systemMetadata : SystemMetadataClass

tagKey : TagKeyClass

tagProperties : TagPropertiesClass

telemetryClientId : TelemetryClientIdClass

telemetryKey : TelemetryKeyClass

testInfo : TestInfoClass

testKey : TestKeyClass

testResults : TestResultsClass

upstreamLineage : UpstreamLineageClass

versionInfo : VersionInfoClass

versionProperties : VersionPropertiesClass

versionSetKey : VersionSetKeyClass

versionSetProperties : VersionSetPropertiesClass

viewProperties : ViewPropertiesClass

AssertionActionClass

class datahub.metadata.schema_classes.AssertionActionClass(type)

Bases: DictWrapper

The Actions about an Assertion. In the future, we’ll likely extend this model to support additional parameters or options related to the assertion actions.

property type : str | AssertionActionTypeClass

The type of the Action

AssertionActionTypeClass

class datahub.metadata.schema_classes.AssertionActionTypeClass()

Bases: object

RAISE_INCIDENT = 'RAISE_INCIDENT'

RESOLVE_INCIDENT = 'RESOLVE_INCIDENT'

AssertionActionsClass

class datahub.metadata.schema_classes.AssertionActionsClass(onSuccess = None, onFailure = None)

Bases: _Aspect

The Actions about an Assertion

property onFailure : List[AssertionActionClass]

Actions to be executed on failed assertion run.

property onSuccess : List[AssertionActionClass]

Actions to be executed on successful assertion run.

AssertionInfoClass

class datahub.metadata.schema_classes.AssertionInfoClass(type, customProperties = None, externalUrl = None, datasetAssertion = None, freshnessAssertion = None, volumeAssertion = None, sqlAssertion = None, fieldAssertion = None, schemaAssertion = None, customAssertion = None, source = None, lastUpdated = None, description = None)

Bases: _Aspect

Information about an assertion

property customAssertion : None | CustomAssertionInfoClass

A Custom Assertion definition. This field is populated when type is CUSTOM.

property customProperties : Dict[str, str]

Custom property bag.

property datasetAssertion : None | DatasetAssertionInfoClass

A Dataset Assertion definition. This field is populated when the type is DATASET.

property description : None | str

An optional human-readable description of the assertion

property externalUrl : None | str

URL where the reference exist

property fieldAssertion : None | FieldAssertionInfoClass

A Field Assertion definition. This field is populated when the type is FIELD.

property freshnessAssertion : None | FreshnessAssertionInfoClass

An Freshness Assertion definition. This field is populated when the type is FRESHNESS.

property lastUpdated : None | AuditStampClass

The time at which the assertion was last updated and the actor who updated it. This field is only present for Native assertions updated after this field was introduced.

property schemaAssertion : None | SchemaAssertionInfoClass

An schema Assertion definition. This field is populated when the type is DATA_SCHEMA

property source : None | AssertionSourceClass

The source or origin of the Assertion definition.

If the source type of the Assertion is EXTERNAL, it is expected to have a corresponding dataPlatformInstance aspect detailing the platform where it was ingested from.

property sqlAssertion : None | SqlAssertionInfoClass

A SQL Assertion definition. This field is populated when the type is SQL.

property type : str | AssertionTypeClass

Type of assertion. Assertion types can evolve to span Datasets, Flows (Pipelines), Models, Features etc.

property volumeAssertion : None | VolumeAssertionInfoClass

An Volume Assertion definition. This field is populated when the type is VOLUME.

AssertionKeyClass

class datahub.metadata.schema_classes.AssertionKeyClass(assertionId)

Bases: _Aspect

Key for a Assertion

  • Parameters:assertionId (str)

property assertionId : str

Unique id for the assertion.

AssertionResultClass

class datahub.metadata.schema_classes.AssertionResultClass(type, rowCount = None, missingCount = None, unexpectedCount = None, actualAggValue = None, nativeResults = None, externalUrl = None, error = None)

Bases: DictWrapper

The result of running an assertion

  • Parameters:
    • type (Union[str, AssertionResultTypeClass]) –
    • rowCount (Optional[int])
    • missingCount (Optional[int])
    • unexpectedCount (Optional[int])
    • actualAggValue (Optional[float])
    • nativeResults (Optional[Dict[str, str]])
    • externalUrl (Optional[str])
    • error (Optional[AssertionResultErrorClass]) –

property actualAggValue : None | float

Observed aggregate value for evaluated batch

property error : None | AssertionResultErrorClass

The error object if AssertionResultType is an Error

property externalUrl : None | str

External URL where full results are available. Only present when assertion source is not native.

property missingCount : None | int

Number of rows with missing value for evaluated batch

property nativeResults : None | Dict[str, str]

Other results of evaluation

property rowCount : None | int

Number of rows for evaluated batch

property type : str | AssertionResultTypeClass

The final result, e.g. either SUCCESS, FAILURE, or ERROR.

property unexpectedCount : None | int

Number of rows with unexpected value for evaluated batch

AssertionResultErrorClass

class datahub.metadata.schema_classes.AssertionResultErrorClass(type, properties = None)

Bases: DictWrapper

An error encountered when evaluating an AssertionResult

property properties : None | Dict[str, str]

Additional metadata depending on the type of error

property type : str | AssertionResultErrorTypeClass

The type of error encountered

AssertionResultErrorTypeClass

class datahub.metadata.schema_classes.AssertionResultErrorTypeClass()

Bases: object

CUSTOM_SQL_ERROR = 'CUSTOM_SQL_ERROR'

FIELD_ASSERTION_ERROR = 'FIELD_ASSERTION_ERROR'

INSUFFICIENT_DATA = 'INSUFFICIENT_DATA'

INVALID_PARAMETERS = 'INVALID_PARAMETERS'

INVALID_SOURCE_TYPE = 'INVALID_SOURCE_TYPE'

SOURCE_CONNECTION_ERROR = 'SOURCE_CONNECTION_ERROR'

SOURCE_QUERY_FAILED = 'SOURCE_QUERY_FAILED'

UNKNOWN_ERROR = 'UNKNOWN_ERROR'

UNSUPPORTED_PLATFORM = 'UNSUPPORTED_PLATFORM'

AssertionResultTypeClass

class datahub.metadata.schema_classes.AssertionResultTypeClass()

Bases: object

The final result of evaluating an assertion, e.g. SUCCESS, FAILURE, or ERROR.

ERROR = 'ERROR'

FAILURE = 'FAILURE'

INIT = 'INIT'

SUCCESS = 'SUCCESS'

AssertionRunEventClass

class datahub.metadata.schema_classes.AssertionRunEventClass(timestampMillis, runId, asserteeUrn, status, assertionUrn, result = None, runtimeContext = None, batchSpec = None, eventGranularity = None, partitionSpec = None, messageId = None)

Bases: _Aspect

An event representing the current status of evaluating an assertion on a batch. AssertionRunEvent should be used for reporting the status of a run as an assertion evaluation progresses.

ASPECT_TYPE : ClassVar[str] = 'timeseries'

property asserteeUrn : str

property assertionUrn : str

property batchSpec : None | BatchSpecClass

Specification of the batch which this run is evaluating

property eventGranularity : None | TimeWindowSizeClass

Granularity of the event if applicable

property messageId : None | str

The optional messageId, if provided serves as a custom user-defined unique identifier for an aspect value.

property partitionSpec : PartitionSpecClass | None

The optional partition specification.

property result : None | AssertionResultClass

Results of assertion, present if the status is COMPLETE

property runId : str

Native (platform-specific) identifier for this run

property runtimeContext : None | Dict[str, str]

Runtime parameters of evaluation

property status : str | AssertionRunStatusClass

The status of the assertion run as per this timeseries event.

property timestampMillis : int

The event timestamp field as epoch at UTC in milli seconds.

AssertionRunStatusClass

class datahub.metadata.schema_classes.AssertionRunStatusClass()

Bases: object

The lifecycle status of an assertion run.

COMPLETE = 'COMPLETE'

AssertionSourceClass

class datahub.metadata.schema_classes.AssertionSourceClass(type, created = None)

Bases: DictWrapper

The source of an assertion

property created : None | AuditStampClass

The time at which the assertion was initially created and the author who created it. This field is only present for Native assertions created after this field was introduced.

property type : str | AssertionSourceTypeClass

The type of the Assertion Source

AssertionSourceTypeClass

class datahub.metadata.schema_classes.AssertionSourceTypeClass()

Bases: object

EXTERNAL = 'EXTERNAL'

INFERRED = 'INFERRED'

NATIVE = 'NATIVE'

AssertionStdAggregationClass

class datahub.metadata.schema_classes.AssertionStdAggregationClass()

Bases: object

The function that is applied to the aggregation input (schema, rows, column values) before evaluating an operator.

COLUMNS = 'COLUMNS'

COLUMN_COUNT = 'COLUMN_COUNT'

IDENTITY = 'IDENTITY'

MAX = 'MAX'

MEAN = 'MEAN'

MEDIAN = 'MEDIAN'

MIN = 'MIN'

NULL_COUNT = 'NULL_COUNT'

NULL_PROPORTION = 'NULL_PROPORTION'

ROW_COUNT = 'ROW_COUNT'

STDDEV = 'STDDEV'

SUM = 'SUM'

UNIQUE_COUNT = 'UNIQUE_COUNT'

UNIQUE_PROPORTION = 'UNIQUE_PROPORTION'

UNIQUE_PROPOTION = 'UNIQUE_PROPOTION'

AssertionStdOperatorClass

class datahub.metadata.schema_classes.AssertionStdOperatorClass()

Bases: object

A boolean operator that is applied on the input to an assertion, after an aggregation function has been applied.

BETWEEN = 'BETWEEN'

CONTAIN = 'CONTAIN'

END_WITH = 'END_WITH'

EQUAL_TO = 'EQUAL_TO'

GREATER_THAN = 'GREATER_THAN'

GREATER_THAN_OR_EQUAL_TO = 'GREATER_THAN_OR_EQUAL_TO'

IN = 'IN'

IS_FALSE = 'IS_FALSE'

IS_TRUE = 'IS_TRUE'

LESS_THAN = 'LESS_THAN'

LESS_THAN_OR_EQUAL_TO = 'LESS_THAN_OR_EQUAL_TO'

NOT_EQUAL_TO = 'NOT_EQUAL_TO'

NOT_IN = 'NOT_IN'

NOT_NULL = 'NOT_NULL'

NULL = 'NULL'

REGEX_MATCH = 'REGEX_MATCH'

START_WITH = 'START_WITH'

AssertionStdParameterClass

class datahub.metadata.schema_classes.AssertionStdParameterClass(value, type)

Bases: DictWrapper

Single parameter for AssertionStdOperators.

property type : str | AssertionStdParameterTypeClass

The type of the parameter

property value : str

The parameter value

AssertionStdParameterTypeClass

class datahub.metadata.schema_classes.AssertionStdParameterTypeClass()

Bases: object

LIST = 'LIST'

NUMBER = 'NUMBER'

SET = 'SET'

STRING = 'STRING'

UNKNOWN = 'UNKNOWN'

AssertionStdParametersClass

class datahub.metadata.schema_classes.AssertionStdParametersClass(value = None, maxValue = None, minValue = None)

Bases: DictWrapper

Parameters for AssertionStdOperators.

property maxValue : None | AssertionStdParameterClass

The maxValue parameter of an assertion

property minValue : None | AssertionStdParameterClass

The minValue parameter of an assertion

property value : None | AssertionStdParameterClass

The value parameter of an assertion

AssertionTypeClass

class datahub.metadata.schema_classes.AssertionTypeClass()

Bases: object

CUSTOM = 'CUSTOM'

DATASET = 'DATASET'

DATA_SCHEMA = 'DATA_SCHEMA'

FIELD = 'FIELD'

FRESHNESS = 'FRESHNESS'

SQL = 'SQL'

VOLUME = 'VOLUME'

AssertionValueChangeTypeClass

class datahub.metadata.schema_classes.AssertionValueChangeTypeClass()

Bases: object

An enum to represent a type of change in an assertion value, metric, or measurement.

ABSOLUTE = 'ABSOLUTE'

PERCENTAGE = 'PERCENTAGE'

AuditStampClass

class datahub.metadata.schema_classes.AuditStampClass(time, actor, impersonator = None, message = None)

Bases: DictWrapper

Data captured on a resource/association/sub-resource level giving insight into when that resource/association/sub-resource moved into a particular lifecycle stage, and who acted to move it into that specific lifecycle stage.

  • Parameters:
    • time (int)
    • actor (str)
    • impersonator (Optional[str])
    • message (Optional[str])

property actor : str

The entity (e.g. a member URN) which will be credited for moving the resource/association/sub-resource into the specific lifecycle stage. It is also the one used to authorize the change.

property impersonator : None | str

The entity (e.g. a service URN) which performs the change on behalf of the Actor and must be authorized to act as the Actor.

property message : None | str

was the change created by an automated process, or manually.

  • Type: Additional context around how DataHub was informed of the particular change. For example

property time : int

When did the resource/association/sub-resource move into the specific lifecycle stage represented by this AuditEvent.

AzkabanJobTypeClass

class datahub.metadata.schema_classes.AzkabanJobTypeClass()

Bases: object

The various types of support azkaban jobs

COMMAND = 'COMMAND'

GLUE = 'GLUE'

HADOOP_JAVA = 'HADOOP_JAVA'

HADOOP_SHELL = 'HADOOP_SHELL'

HIVE = 'HIVE'

PIG = 'PIG'

SQL = 'SQL'

BaseDataClass

class datahub.metadata.schema_classes.BaseDataClass(dataset, motivation = None, preProcessing = None)

Bases: DictWrapper

BaseData record

  • Parameters:
    • dataset (str)
    • motivation (Optional[str])
    • preProcessing (Optional[List[str]])

property dataset : str

What dataset were used in the MLModel?

property motivation : None | str

Why was this dataset chosen?

property preProcessing : None | List[str]

How was the data preprocessed (e.g., tokenization of sentences, cropping of images, any filtering such as dropping images without faces)?

BatchSpecClass

class datahub.metadata.schema_classes.BatchSpecClass(customProperties = None, nativeBatchId = None, query = None, limit = None)

Bases: DictWrapper

A batch on which certain operations, e.g. data quality evaluation, is done.

  • Parameters:
    • customProperties (Optional[Dict[str, str]])
    • nativeBatchId (Optional[str])
    • query (Optional[str])
    • limit (Optional[int])

property customProperties : Dict[str, str]

Custom property bag.

property limit : None | int

Any limit to the number of rows in the batch, if applied

property nativeBatchId : None | str

The native identifier as specified by the system operating on the batch.

property query : None | str

A query that identifies a batch of data

BinaryJsonSchemaClass

class datahub.metadata.schema_classes.BinaryJsonSchemaClass(schema)

Bases: DictWrapper

Schema text of binary JSON schema.

  • Parameters:schema (str)

property schema : str

The native schema text for binary JSON file format.

BooleanTypeClass

class datahub.metadata.schema_classes.BooleanTypeClass()

Bases: DictWrapper

Boolean field type.

BrowsePathEntryClass

class datahub.metadata.schema_classes.BrowsePathEntryClass(id, urn = None)

Bases: DictWrapper

Represents a single level in an entity’s browsePathV2

  • Parameters:
    • id (str)
    • urn (Optional[str])

property id : str

The ID of the browse path entry. This is what gets stored in the index. If there’s an urn associated with this entry, id and urn will be the same

property urn : None | str

Optional urn pointing to some entity in DataHub

BrowsePathsClass

class datahub.metadata.schema_classes.BrowsePathsClass(paths)

Bases: _Aspect

Shared aspect containing Browse Paths to be indexed for an entity.

  • Parameters:paths (List[str])

property paths : List[str]

A list of valid browse paths for the entity.

Browse paths are expected to be forward slash-separated strings. For example: ‘prod/snowflake/datasetName’

BrowsePathsV2Class

class datahub.metadata.schema_classes.BrowsePathsV2Class(path)

Bases: _Aspect

Shared aspect containing a Browse Path to be indexed for an entity.

property path : List[BrowsePathEntryClass]

A valid browse path for the entity. This field is provided by DataHub by default. This aspect is a newer version of browsePaths where we can encode more information in the path. This path is also based on containers for a given entity if it has containers.

This is stored in elasticsearch as unit-separator delimited strings and only includes platform specific folders or containers. These paths should not include high level info captured elsewhere ie. Platform and Environment.

BusinessAttributeAssociationClass

class datahub.metadata.schema_classes.BusinessAttributeAssociationClass(businessAttributeUrn)

Bases: DictWrapper

  • Parameters:businessAttributeUrn (str)

property businessAttributeUrn : str

Urn of the applied businessAttribute

BusinessAttributeInfoClass

class datahub.metadata.schema_classes.BusinessAttributeInfoClass(fieldPath, name, description = None, globalTags = None, glossaryTerms = None, customProperties = None, created = None, lastModified = None, deleted = None, type = None)

Bases: _Aspect

Properties associated with a BusinessAttribute

property created : AuditStampClass

An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.

property customProperties : Dict[str, str]

Custom property bag.

property deleted : None | AuditStampClass

An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.

property description : None | str

Description

property fieldPath : str

FieldPath uniquely identifying the SchemaField this metadata is associated with

property globalTags : None | GlobalTagsClass

Tags associated with the field

property glossaryTerms : None | GlossaryTermsClass

Glossary terms associated with the field

property lastModified : AuditStampClass

An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.

property name : str

Display name of the BusinessAttribute

property type : None | SchemaFieldDataTypeClass

BusinessAttributeKeyClass

class datahub.metadata.schema_classes.BusinessAttributeKeyClass(id)

Bases: _Aspect

Key for a Query

  • Parameters:id (str)

property id : str

A unique id for the Business Attribute.

BusinessAttributesClass

class datahub.metadata.schema_classes.BusinessAttributesClass(businessAttribute = None)

Bases: _Aspect

BusinessAttribute aspect used for applying it to an entity

property businessAttribute : None | BusinessAttributeAssociationClass

Business Attribute for this field.

BytesTypeClass

class datahub.metadata.schema_classes.BytesTypeClass()

Bases: DictWrapper

Bytes field type.

CalendarIntervalClass

class datahub.metadata.schema_classes.CalendarIntervalClass()

Bases: object

DAY = 'DAY'

HOUR = 'HOUR'

MINUTE = 'MINUTE'

MONTH = 'MONTH'

QUARTER = 'QUARTER'

SECOND = 'SECOND'

WEEK = 'WEEK'

YEAR = 'YEAR'

CaveatDetailsClass

class datahub.metadata.schema_classes.CaveatDetailsClass(needsFurtherTesting = None, caveatDescription = None, groupsNotRepresented = None)

Bases: DictWrapper

This section should list additional concerns that were not covered in the previous sections. For example, did the results suggest any further testing? Were there any relevant groups that were not represented in the evaluation dataset? Are there additional recommendations for model use?

  • Parameters:
    • needsFurtherTesting (Optional[bool])
    • caveatDescription (Optional[str])
    • groupsNotRepresented (Optional[List[str]])

property caveatDescription : None | str

Caveat Description For ex: Given gender classes are binary (male/not male), which we include as male/female. Further work needed to evaluate across a spectrum of genders.

property groupsNotRepresented : None | List[str]

Relevant groups that were not represented in the evaluation dataset?

property needsFurtherTesting : None | bool

Did the results suggest any further testing?

CaveatsAndRecommendationsClass

class datahub.metadata.schema_classes.CaveatsAndRecommendationsClass(caveats = None, recommendations = None, idealDatasetCharacteristics = None)

Bases: _Aspect

This section should list additional concerns that were not covered in the previous sections. For example, did the results suggest any further testing? Were there any relevant groups that were not represented in the evaluation dataset? Are there additional recommendations for model use?

  • Parameters:
    • caveats (Optional[CaveatDetailsClass]) –
    • recommendations (Optional[str])
    • idealDatasetCharacteristics (Optional[List[str]])

property caveats : None | CaveatDetailsClass

This section should list additional concerns that were not covered in the previous sections. For example, did the results suggest any further testing? Were there any relevant groups that were not represented in the evaluation dataset?

property idealDatasetCharacteristics : None | List[str]

Ideal characteristics of an evaluation dataset for this MLModel

property recommendations : None | str

Recommendations on where this MLModel should be used.

ChangeAuditStampsClass

class datahub.metadata.schema_classes.ChangeAuditStampsClass(created = None, lastModified = None, deleted = None)

Bases: DictWrapper

Data captured on a resource/association/sub-resource level giving insight into when that resource/association/sub-resource moved into various lifecycle stages, and who acted to move it into those lifecycle stages. The recommended best practice is to include this record in your record schema, and annotate its fields as @readOnly in your resource. See https://github.com/linkedin/rest.li/wiki/Validation-in-Rest.li#restli-validation-annotations

property created : AuditStampClass

An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.

property deleted : None | AuditStampClass

An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.

property lastModified : AuditStampClass

An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.

ChangeTypeClass

class datahub.metadata.schema_classes.ChangeTypeClass()

Bases: object

Descriptor for a change action

CREATE = 'CREATE'

CREATE_ENTITY = 'CREATE_ENTITY'

DELETE = 'DELETE'

PATCH = 'PATCH'

RESTATE = 'RESTATE'

UPDATE = 'UPDATE'

UPSERT = 'UPSERT'

ChartCellClass

class datahub.metadata.schema_classes.ChartCellClass(cellId, changeAuditStamps, cellTitle = None)

Bases: DictWrapper

Chart cell in a notebook, which will present content in chart format

property cellId : str

Unique id for the cell. This id should be globally unique for a Notebook tool even when there are multiple deployments of it. As an example, Notebook URL could be used here for QueryBook such as ‘querybook.com/notebook/773/?cellId=1234’

property cellTitle : None | str

Title of the cell

property changeAuditStamps : ChangeAuditStampsClass

Captures information about who created/last modified/deleted this Notebook cell and when

ChartInfoClass

class datahub.metadata.schema_classes.ChartInfoClass(title, description, lastModified, customProperties = None, externalUrl = None, chartUrl = None, inputs = None, inputEdges = None, type = None, access = None, lastRefreshed = None)

Bases: _Aspect

Information about a chart

  • Parameters:
    • title (str)
    • description (str)
    • lastModified (ChangeAuditStampsClass) –
    • customProperties (Optional[Dict[str, str]])
    • externalUrl (Optional[str])
    • chartUrl (Optional[str])
    • inputs (Optional[List[str]])
    • inputEdges (Optional[List[EdgeClass]]) –
    • type (Union[None, str, ChartTypeClass]) –
    • access (Union[None, str, AccessLevelClass]) –
    • lastRefreshed (Optional[int])

property access : None | str | AccessLevelClass

Access level for the chart

property chartUrl : None | str

URL for the chart. This could be used as an external link on DataHub to allow users access/view the chart

property customProperties : Dict[str, str]

Custom property bag.

property description : str

Detailed description about the chart

property externalUrl : None | str

URL where the reference exist

property inputEdges : None | List[EdgeClass]

Data sources for the chart

property inputs : None | List[str]

Data sources for the chart Deprecated! Use inputEdges instead.

property lastModified : ChangeAuditStampsClass

Captures information about who created/last modified/deleted this chart and when

property lastRefreshed : None | int

The time when this chart last refreshed

property title : str

Title of the chart

property type : None | str | ChartTypeClass

Type of the chart

ChartKeyClass

class datahub.metadata.schema_classes.ChartKeyClass(dashboardTool, chartId)

Bases: _Aspect

Key for a Chart

  • Parameters:
    • dashboardTool (str)
    • chartId (str)

property chartId : str

Unique id for the chart. This id should be globally unique for a dashboarding tool even when there are multiple deployments of it. As an example, chart URL could be used here for Looker such as ‘looker.linkedin.com/looks/1234’

property dashboardTool : str

The name of the dashboard tool such as looker, redash etc.

ChartQueryClass

class datahub.metadata.schema_classes.ChartQueryClass(rawQuery, type)

Bases: _Aspect

Information for chart query which is used for getting data of the chart

property rawQuery : str

Raw query to build a chart from input datasets

property type : str | ChartQueryTypeClass

Chart query type

ChartQueryTypeClass

class datahub.metadata.schema_classes.ChartQueryTypeClass()

Bases: object

LOOKML = 'LOOKML'

SQL = 'SQL'

ChartSnapshotClass

class datahub.metadata.schema_classes.ChartSnapshotClass(urn, aspects)

Bases: DictWrapper

A metadata snapshot for a specific Chart entity.

property aspects : List[ChartKeyClass | ChartInfoClass | ChartQueryClass | EditableChartPropertiesClass | OwnershipClass | StatusClass | GlobalTagsClass | BrowsePathsClass | GlossaryTermsClass | InstitutionalMemoryClass | DataPlatformInstanceClass | BrowsePathsV2Class]

The list of metadata aspects associated with the chart. Depending on the use case, this can either be all, or a selection, of supported aspects.

property urn : str

URN for the entity the metadata snapshot is associated with.

ChartTypeClass

class datahub.metadata.schema_classes.ChartTypeClass()

Bases: object

The various types of charts

AREA = 'AREA'

BAR = 'BAR'

BOX_PLOT = 'BOX_PLOT'

COHORT = 'COHORT'

HISTOGRAM = 'HISTOGRAM'

LINE = 'LINE'

PIE = 'PIE'

SCATTER = 'SCATTER'

TABLE = 'TABLE'

TEXT = 'TEXT'

WORD_CLOUD = 'WORD_CLOUD'

ChartUsageStatisticsClass

class datahub.metadata.schema_classes.ChartUsageStatisticsClass(timestampMillis, eventGranularity = None, partitionSpec = None, messageId = None, viewsCount = None, uniqueUserCount = None, userCounts = None)

Bases: _Aspect

Experimental (Subject to breaking change) – Stats corresponding to chart’s usage.

If this aspect represents the latest snapshot of the statistics about a Chart, the eventGranularity field should be null. If this aspect represents a bucketed window of usage statistics (e.g. over a day), then the eventGranularity field should be set accordingly.

ASPECT_TYPE : ClassVar[str] = 'timeseries'

property eventGranularity : None | TimeWindowSizeClass

Granularity of the event if applicable

property messageId : None | str

The optional messageId, if provided serves as a custom user-defined unique identifier for an aspect value.

property partitionSpec : PartitionSpecClass | None

The optional partition specification.

property timestampMillis : int

The event timestamp field as epoch at UTC in milli seconds.

property uniqueUserCount : None | int

Unique user count

property userCounts : None | List[ChartUserUsageCountsClass]

Users within this bucket, with frequency counts

property viewsCount : None | int

The total number of times chart has been viewed

ChartUserUsageCountsClass

class datahub.metadata.schema_classes.ChartUserUsageCountsClass(user, viewsCount = None)

Bases: DictWrapper

Records a single user’s usage counts for a given resource

  • Parameters:
    • user (str)
    • viewsCount (Optional[int])

property user : str

The unique id of the user.

property viewsCount : None | int

The number of times the user has viewed the chart

ConditionClass

class datahub.metadata.schema_classes.ConditionClass()

Bases: object

The matching condition in a filter criterion

ANCESTORS_INCL = 'ANCESTORS_INCL'

CONTAIN = 'CONTAIN'

DESCENDANTS_INCL = 'DESCENDANTS_INCL'

END_WITH = 'END_WITH'

EQUAL = 'EQUAL'

EXISTS = 'EXISTS'

GREATER_THAN = 'GREATER_THAN'

GREATER_THAN_OR_EQUAL_TO = 'GREATER_THAN_OR_EQUAL_TO'

IEQUAL = 'IEQUAL'

IN = 'IN'

IS_NULL = 'IS_NULL'

LESS_THAN = 'LESS_THAN'

LESS_THAN_OR_EQUAL_TO = 'LESS_THAN_OR_EQUAL_TO'

START_WITH = 'START_WITH'

ConjunctiveCriterionClass

class datahub.metadata.schema_classes.ConjunctiveCriterionClass(and_)

Bases: DictWrapper

A list of criterion and’d together.

property and_ : List[CriterionClass]

A list of and criteria the filter applies to the query

ContainerClass

class datahub.metadata.schema_classes.ContainerClass(container)

Bases: _Aspect

Link from an asset to its parent container

  • Parameters:container (str)

property container : str

The parent container of an asset

ContainerKeyClass

class datahub.metadata.schema_classes.ContainerKeyClass(guid = None)

Bases: _Aspect

Key for an Asset Container

  • Parameters:guid (Optional[str])

property guid : None | str

Unique guid for container

ContainerPropertiesClass

class datahub.metadata.schema_classes.ContainerPropertiesClass(name, customProperties = None, externalUrl = None, qualifiedName = None, description = None, env = None, created = None, lastModified = None)

Bases: _Aspect

Information about a Asset Container as received from a 3rd party source system

  • Parameters:
    • name (str)
    • customProperties (Optional[Dict[str, str]])
    • externalUrl (Optional[str])
    • qualifiedName (Optional[str])
    • description (Optional[str])
    • env (Union[None, str, FabricTypeClass]) –
    • created (Optional[TimeStampClass]) –
    • lastModified (Optional[TimeStampClass]) –

property created : None | TimeStampClass

A timestamp documenting when the asset was created in the source Data Platform (not on DataHub)

property customProperties : Dict[str, str]

Custom property bag.

property description : None | str

Description of the Asset Container as it exists inside a source system

property env : None | str | FabricTypeClass

Environment for this flow

property externalUrl : None | str

URL where the reference exist

property lastModified : None | TimeStampClass

A timestamp documenting when the asset was last modified in the source Data Platform (not on DataHub)

property name : str

Display name of the Asset Container

property qualifiedName : None | str

Fully-qualified name of the Container

CorpGroupEditableInfoClass

class datahub.metadata.schema_classes.CorpGroupEditableInfoClass(description = None, pictureLink = None, slack = None, email = None)

Bases: _Aspect

Group information that can be edited from UI

  • Parameters:
    • description (Optional[str])
    • pictureLink (Optional[str])
    • slack (Optional[str])
    • email (Optional[str])

property description : None | str

A description of the group

property email : None | str

Email address to contact the group

A URL which points to a picture which user wants to set as the photo for the group

property slack : None | str

Slack channel for the group

CorpGroupInfoClass

class datahub.metadata.schema_classes.CorpGroupInfoClass(admins, members, groups, displayName = None, email = None, description = None, slack = None, created = None)

Bases: _Aspect

Information about a Corp Group ingested from a third party source

  • Parameters:
    • admins (List[str])
    • members (List[str])
    • groups (List[str])
    • displayName (Optional[str])
    • email (Optional[str])
    • description (Optional[str])
    • slack (Optional[str])
    • created (Optional[AuditStampClass]) –

property admins : List[str]

owners of this group Deprecated! Replaced by Ownership aspect.

property created : None | AuditStampClass

Created Audit stamp

property description : None | str

A description of the group.

property displayName : None | str

The name of the group.

property email : None | str

email of this group

property groups : List[str]

List of groups in this group. Deprecated! This field is unused.

property members : List[str]

List of ldap urn in this group. Deprecated! Replaced by GroupMembership aspect.

property slack : None | str

Slack channel for the group

CorpGroupKeyClass

class datahub.metadata.schema_classes.CorpGroupKeyClass(name)

Bases: _Aspect

Key for a CorpGroup

  • Parameters:name (str)

property name : str

The URL-encoded name of the AD/LDAP group. Serves as a globally unique identifier within DataHub.

CorpGroupSnapshotClass

class datahub.metadata.schema_classes.CorpGroupSnapshotClass(urn, aspects)

Bases: DictWrapper

A metadata snapshot for a specific CorpGroup entity.

property aspects : List[CorpGroupKeyClass | CorpGroupInfoClass | GlobalTagsClass | StatusClass]

The list of metadata aspects associated with the LdapUser. Depending on the use case, this can either be all, or a selection, of supported aspects.

property urn : str

URN for the entity the metadata snapshot is associated with.

CorpUserAppearanceSettingsClass

class datahub.metadata.schema_classes.CorpUserAppearanceSettingsClass(showSimplifiedHomepage = None, showThemeV2 = None)

Bases: DictWrapper

Settings for a user around the appearance of their DataHub UI

  • Parameters:
    • showSimplifiedHomepage (Optional[bool])
    • showThemeV2 (Optional[bool])

property showSimplifiedHomepage : None | bool

Flag whether the user should see a homepage with only datasets, charts and dashboards. Intended for users who have less operational use cases for the datahub tool.

property showThemeV2 : None | bool

Flag controlling whether the V2 UI for DataHub is shown.

CorpUserCredentialsClass

class datahub.metadata.schema_classes.CorpUserCredentialsClass(salt, hashedPassword, passwordResetToken = None, passwordResetTokenExpirationTimeMillis = None)

Bases: _Aspect

Corp user credentials

  • Parameters:
    • salt (str)
    • hashedPassword (str)
    • passwordResetToken (Optional[str])
    • passwordResetTokenExpirationTimeMillis (Optional[int])

property hashedPassword : str

Hashed password generated by concatenating salt and password, then hashing

property passwordResetToken : None | str

Optional token needed to reset a user’s password. Can only be set by the admin.

property passwordResetTokenExpirationTimeMillis : None | int

When the password reset token expires.

property salt : str

Salt used to hash password

CorpUserEditableInfoClass

class datahub.metadata.schema_classes.CorpUserEditableInfoClass(aboutMe = None, teams = None, skills = None, pictureLink = None, displayName = None, title = None, platforms = None, persona = None, slack = None, phone = None, email = None, informationSources = None)

Bases: _Aspect

Linkedin corp user information that can be edited from UI

  • Parameters:
    • aboutMe (Optional[str])
    • teams (Optional[List[str]])
    • skills (Optional[List[str]])
    • pictureLink (Optional[str])
    • displayName (Optional[str])
    • title (Optional[str])
    • platforms (Optional[List[str]])
    • persona (Optional[str])
    • slack (Optional[str])
    • phone (Optional[str])
    • email (Optional[str])
    • informationSources (Optional[List[str]])

property aboutMe : None | str

About me section of the user

property displayName : None | str

DataHub-native display name

property email : None | str

Email address to contact the user

property informationSources : None | List[str]

Information sources that have been used to populate this CorpUserEditableInfo. These include platform resources, such as Slack members or Looker users. They can also refer to other semantic urns in the future.

property persona : None | str

The user’s persona type, based on their role

property phone : None | str

Phone number to contact the user

A URL which points to a picture which user wants to set as a profile photo

property platforms : None | List[str]

The platforms that the user commonly works with

property skills : List[str]

Skills that the user possesses e.g. Machine Learning

property slack : None | str

Slack handle for the user

property teams : List[str]

Teams that the user belongs to e.g. Metadata

property title : None | str

DataHub-native Title, e.g. ‘Software Engineer’

CorpUserInfoClass

class datahub.metadata.schema_classes.CorpUserInfoClass(active, customProperties = None, displayName = None, email = None, title = None, managerUrn = None, departmentId = None, departmentName = None, firstName = None, lastName = None, fullName = None, countryCode = None, system = None)

Bases: _Aspect

Linkedin corp user information

  • Parameters:
    • active (bool)
    • customProperties (Optional[Dict[str, str]])
    • displayName (Optional[str])
    • email (Optional[str])
    • title (Optional[str])
    • managerUrn (Optional[str])
    • departmentId (Optional[int])
    • departmentName (Optional[str])
    • firstName (Optional[str])
    • lastName (Optional[str])
    • fullName (Optional[str])
    • countryCode (Optional[str])
    • system (Optional[bool])

property active : bool

//iwww.corp.linkedin.com/wiki/cf/display/GTSD/Accessing+Active+Directory+via+LDAP+tools

  • Type: Deprecated! Use CorpUserStatus instead. Whether the corpUser is active, ref
  • Type: https

property countryCode : None | str

two uppercase letters country code. e.g. US

property customProperties : Dict[str, str]

Custom property bag.

property departmentId : None | int

department id this user belong to

property departmentName : None | str

department name this user belong to

property displayName : None | str

displayName of this user , e.g. Hang Zhang(DataHQ)

property email : None | str

email address of this user

property firstName : None | str

first name of this user

property fullName : None | str

Common name of this user, format is firstName + lastName (split by a whitespace)

property lastName : None | str

last name of this user

property managerUrn : None | str

direct manager of this user

property system : bool | None

Whether the corpUser is a system user.

property title : None | str

title of this user

CorpUserKeyClass

class datahub.metadata.schema_classes.CorpUserKeyClass(username)

Bases: _Aspect

Key for a CorpUser

  • Parameters:username (str)

property username : str

The name of the AD/LDAP user.

CorpUserSettingsClass

class datahub.metadata.schema_classes.CorpUserSettingsClass(appearance, views = None, notificationSettings = None)

Bases: _Aspect

Settings that a user can customize through the datahub ui

property appearance : CorpUserAppearanceSettingsClass

Settings for a user around the appearance of their DataHub U

property notificationSettings : None | NotificationSettingsClass

Notification settings for a user

property views : None | CorpUserViewsSettingsClass

User preferences for the Views feature.

CorpUserSnapshotClass

class datahub.metadata.schema_classes.CorpUserSnapshotClass(urn, aspects)

Bases: DictWrapper

A metadata snapshot for a specific CorpUser entity.

property aspects : List[CorpUserKeyClass | CorpUserInfoClass | CorpUserEditableInfoClass | CorpUserStatusClass | GroupMembershipClass | GlobalTagsClass | StatusClass]

The list of metadata aspects associated with the CorpUser. Depending on the use case, this can either be all, or a selection, of supported aspects.

property urn : str

URN for the entity the metadata snapshot is associated with.

CorpUserStatusClass

class datahub.metadata.schema_classes.CorpUserStatusClass(status, lastModified)

Bases: _Aspect

The status of the user, e.g. provisioned, active, suspended, etc.

property lastModified : AuditStampClass

Audit stamp containing who last modified the status and when.

property status : str

Status of the user, e.g. PROVISIONED / ACTIVE / SUSPENDED

CorpUserViewsSettingsClass

class datahub.metadata.schema_classes.CorpUserViewsSettingsClass(defaultView = None)

Bases: DictWrapper

Settings related to the ‘Views’ feature.

  • Parameters:defaultView (Optional[str])

property defaultView : None | str

The default View which is selected for the user. If none is chosen, then this value will be left blank.

CostClass

class datahub.metadata.schema_classes.CostClass(costType, cost)

Bases: _Aspect

property cost : CostCostClass

property costType : str | CostTypeClass

CostCostClass

class datahub.metadata.schema_classes.CostCostClass(fieldDiscriminator, costId = None, costCode = None)

Bases: DictWrapper

property costCode : None | str

property costId : None | float

property fieldDiscriminator : str | CostCostDiscriminatorClass

Contains the name of the field that has its value set.

CostCostDiscriminatorClass

class datahub.metadata.schema_classes.CostCostDiscriminatorClass()

Bases: object

costCode = 'costCode'

costId = 'costId'

CostTypeClass

class datahub.metadata.schema_classes.CostTypeClass()

Bases: object

Type of Cost Code

ORG_COST_TYPE = 'ORG_COST_TYPE'

CriterionClass

class datahub.metadata.schema_classes.CriterionClass(field, value, values = None, condition = None, negated = None)

Bases: DictWrapper

A criterion for matching a field with given value

  • Parameters:
    • field (str)
    • value (str)
    • values (Optional[List[str]])
    • condition (Union[str, ConditionClass, None]) –
    • negated (Optional[bool])

property condition : str | ConditionClass

The condition for the criterion, e.g. EQUAL, START_WITH

property field : str

The name of the field that the criterion refers to

property negated : bool

Whether the condition should be negated

property value : str

The value of the intended field

property values : List[str]

Values. one of which the intended field should match Note, if values is set, the above “value” field will be ignored

CustomAssertionInfoClass

class datahub.metadata.schema_classes.CustomAssertionInfoClass(type, entity, field = None, logic = None)

Bases: DictWrapper

Attributes that are applicable to Custom Assertions

  • Parameters:
    • type (str)
    • entity (str)
    • field (Optional[str])
    • logic (Optional[str])

property entity : str

The entity targeted by this assertion. This can have support more entityTypes (e.g. dataJob) in future

property field : None | str

dataset schema field targeted by this assertion.

This field is expected to be provided if the assertion is on dataset field

property logic : None | str

property type : str

The type of custom assertion. This is how your assertion will appear categorized in DataHub UI.

DashboardInfoClass

class datahub.metadata.schema_classes.DashboardInfoClass(title, description, lastModified, customProperties = None, externalUrl = None, charts = None, chartEdges = None, datasets = None, datasetEdges = None, dashboards = None, dashboardUrl = None, access = None, lastRefreshed = None)

Bases: _Aspect

Information about a dashboard

  • Parameters:
    • title (str)
    • description (str)
    • lastModified (ChangeAuditStampsClass) –
    • customProperties (Optional[Dict[str, str]])
    • externalUrl (Optional[str])
    • charts (Optional[List[str]])
    • chartEdges (Optional[List[EdgeClass]]) –
    • datasets (Optional[List[str]])
    • datasetEdges (Optional[List[EdgeClass]]) –
    • dashboards (Optional[List[EdgeClass]]) –
    • dashboardUrl (Optional[str])
    • access (Union[None, str, AccessLevelClass]) –
    • lastRefreshed (Optional[int])

property access : None | str | AccessLevelClass

Access level for the dashboard

property chartEdges : None | List[EdgeClass]

Charts in a dashboard

property charts : List[str]

Charts in a dashboard Deprecated! Use chartEdges instead.

property customProperties : Dict[str, str]

Custom property bag.

property dashboardUrl : None | str

URL for the dashboard. This could be used as an external link on DataHub to allow users access/view the dashboard

property dashboards : List[EdgeClass]

Dashboards included by this dashboard. Some dashboard entities (e.g. PowerBI Apps) can contain other dashboards.

The Edge’s sourceUrn should never be set, as it will always be the base dashboard.

property datasetEdges : None | List[EdgeClass]

Datasets consumed by a dashboard

property datasets : List[str]

Datasets consumed by a dashboard Deprecated! Use datasetEdges instead.

property description : str

Detailed description about the dashboard

property externalUrl : None | str

URL where the reference exist

property lastModified : ChangeAuditStampsClass

Captures information about who created/last modified/deleted this dashboard and when

property lastRefreshed : None | int

The time when this dashboard last refreshed

property title : str

Title of the dashboard

DashboardKeyClass

class datahub.metadata.schema_classes.DashboardKeyClass(dashboardTool, dashboardId)

Bases: _Aspect

Key for a Dashboard

  • Parameters:
    • dashboardTool (str)
    • dashboardId (str)

property dashboardId : str

Unique id for the dashboard. This id should be globally unique for a dashboarding tool even when there are multiple deployments of it. As an example, dashboard URL could be used here for Looker such as ‘looker.linkedin.com/dashboards/1234’

property dashboardTool : str

The name of the dashboard tool such as looker, redash etc.

DashboardSnapshotClass

class datahub.metadata.schema_classes.DashboardSnapshotClass(urn, aspects)

Bases: DictWrapper

A metadata snapshot for a specific Dashboard entity.

property aspects : List[DashboardKeyClass | DashboardInfoClass | EditableDashboardPropertiesClass | OwnershipClass | StatusClass | GlobalTagsClass | BrowsePathsClass | GlossaryTermsClass | InstitutionalMemoryClass | DataPlatformInstanceClass | BrowsePathsV2Class]

The list of metadata aspects associated with the dashboard. Depending on the use case, this can either be all, or a selection, of supported aspects.

property urn : str

URN for the entity the metadata snapshot is associated with.

DashboardUsageStatisticsClass

class datahub.metadata.schema_classes.DashboardUsageStatisticsClass(timestampMillis, eventGranularity = None, partitionSpec = None, messageId = None, viewsCount = None, executionsCount = None, uniqueUserCount = None, userCounts = None, favoritesCount = None, lastViewedAt = None)

Bases: _Aspect

Experimental (Subject to breaking change) – Stats corresponding to dashboard’s usage.

If this aspect represents the latest snapshot of the statistics about a Dashboard, the eventGranularity field should be null. If this aspect represents a bucketed window of usage statistics (e.g. over a day), then the eventGranularity field should be set accordingly.

  • Parameters:
    • timestampMillis (int)
    • eventGranularity (Optional[TimeWindowSizeClass]) –
    • partitionSpec (Optional[PartitionSpecClass]) –
    • messageId (Optional[str])
    • viewsCount (Optional[int])
    • executionsCount (Optional[int])
    • uniqueUserCount (Optional[int])
    • userCounts (Optional[List[DashboardUserUsageCountsClass]]) –
    • favoritesCount (Optional[int])
    • lastViewedAt (Optional[int])

ASPECT_TYPE : ClassVar[str] = 'timeseries'

property eventGranularity : None | TimeWindowSizeClass

Granularity of the event if applicable

property executionsCount : None | int

The total number of dashboard executions (refreshes / syncs)

property favoritesCount : None | int

The total number of times that the dashboard has been favorited

property lastViewedAt : None | int

Last viewed at

This should not be set in cases where statistics are windowed.

property messageId : None | str

The optional messageId, if provided serves as a custom user-defined unique identifier for an aspect value.

property partitionSpec : PartitionSpecClass | None

The optional partition specification.

property timestampMillis : int

The event timestamp field as epoch at UTC in milli seconds.

property uniqueUserCount : None | int

Unique user count

property userCounts : None | List[DashboardUserUsageCountsClass]

Users within this bucket, with frequency counts

property viewsCount : None | int

The total number of times dashboard has been viewed

DashboardUserUsageCountsClass

class datahub.metadata.schema_classes.DashboardUserUsageCountsClass(user, viewsCount = None, executionsCount = None, usageCount = None, userEmail = None)

Bases: DictWrapper

Records a single user’s usage counts for a given resource

  • Parameters:
    • user (str)
    • viewsCount (Optional[int])
    • executionsCount (Optional[int])
    • usageCount (Optional[int])
    • userEmail (Optional[str])

property executionsCount : None | int

The number of times the user has executed (refreshed) the dashboard

property usageCount : None | int

Normalized numeric metric representing user’s dashboard usage – the number of times the user executed or viewed the dashboard.

property user : str

The unique id of the user.

property userEmail : None | str

If user_email is set, we attempt to resolve the user’s urn upon ingest

property viewsCount : None | int

The number of times the user has viewed the dashboard

DataContractKeyClass

class datahub.metadata.schema_classes.DataContractKeyClass(id)

Bases: _Aspect

Key for a Data Contract

  • Parameters:id (str)

property id : str

Unique id for the contract

DataContractPropertiesClass

class datahub.metadata.schema_classes.DataContractPropertiesClass(entity, schema = None, freshness = None, dataQuality = None, rawContract = None)

Bases: _Aspect

Information about a data contract

property dataQuality : None | List[DataQualityContractClass]

An optional set of Data Quality contracts, e.g. table and column level contract constraints.

property entity : str

The entity that this contract is associated with. Currently, we only support Dataset contracts, but in the future we may also support Data Product level contracts.

property freshness : None | List[FreshnessContractClass]

An optional set of FRESHNESS contracts. If this is a dataset contract, there will only be one.

property rawContract : None | str

YAML-formatted contract definition

property schema : None | List[SchemaContractClass]

An optional set of schema contracts. If this is a dataset contract, there will only be one.

DataContractStateClass

class datahub.metadata.schema_classes.DataContractStateClass()

Bases: object

ACTIVE = 'ACTIVE'

PENDING = 'PENDING'

DataContractStatusClass

class datahub.metadata.schema_classes.DataContractStatusClass(state, customProperties = None)

Bases: _Aspect

Information about the status of a data contract

property customProperties : Dict[str, str]

Custom property bag.

property state : str | DataContractStateClass

The latest state of the data contract

DataFlowInfoClass

class datahub.metadata.schema_classes.DataFlowInfoClass(name, customProperties = None, externalUrl = None, description = None, project = None, created = None, lastModified = None, env = None)

Bases: _Aspect

Information about a Data processing flow

  • Parameters:
    • name (str)
    • customProperties (Optional[Dict[str, str]])
    • externalUrl (Optional[str])
    • description (Optional[str])
    • project (Optional[str])
    • created (Optional[TimeStampClass]) –
    • lastModified (Optional[TimeStampClass]) –
    • env (Union[None, str, FabricTypeClass]) –

property created : None | TimeStampClass

A timestamp documenting when the asset was created in the source Data Platform (not on DataHub)

property customProperties : Dict[str, str]

Custom property bag.

property description : None | str

Flow description

property env : None | str | FabricTypeClass

Environment for this flow

property externalUrl : None | str

URL where the reference exist

property lastModified : None | TimeStampClass

A timestamp documenting when the asset was last modified in the source Data Platform (not on DataHub)

property name : str

Flow name

property project : None | str

Optional project/namespace associated with the flow

DataFlowKeyClass

class datahub.metadata.schema_classes.DataFlowKeyClass(orchestrator, flowId, cluster)

Bases: _Aspect

Key for a Data Flow

  • Parameters:
    • orchestrator (str)
    • flowId (str)
    • cluster (str)

property cluster : str

Cluster where the flow is executed

property flowId : str

Unique Identifier of the data flow

property orchestrator : str

Workflow manager like azkaban, airflow which orchestrates the flow

DataFlowSnapshotClass

class datahub.metadata.schema_classes.DataFlowSnapshotClass(urn, aspects)

Bases: DictWrapper

A metadata snapshot for a specific DataFlow entity.

property aspects : List[DataFlowKeyClass | DataFlowInfoClass | EditableDataFlowPropertiesClass | OwnershipClass | StatusClass | GlobalTagsClass | BrowsePathsClass | GlossaryTermsClass | InstitutionalMemoryClass | DataPlatformInstanceClass | BrowsePathsV2Class]

The list of metadata aspects associated with the data flow. Depending on the use case, this can either be all, or a selection, of supported aspects.

property urn : str

URN for the entity the metadata snapshot is associated with.

DataHubAccessTokenInfoClass

class datahub.metadata.schema_classes.DataHubAccessTokenInfoClass(name, actorUrn, ownerUrn, createdAt, expiresAt = None, description = None)

Bases: _Aspect

Information about a DataHub Access Token

  • Parameters:
    • name (str)
    • actorUrn (str)
    • ownerUrn (str)
    • createdAt (int)
    • expiresAt (Optional[int])
    • description (Optional[str])

property actorUrn : str

Urn of the actor to which this access token belongs to.

property createdAt : int

When the token was created.

property description : None | str

Description of the token if defined.

property expiresAt : None | int

When the token expires.

property name : str

User defined name for the access token if defined.

property ownerUrn : str

Urn of the actor which created this access token.

DataHubAccessTokenKeyClass

class datahub.metadata.schema_classes.DataHubAccessTokenKeyClass(id)

Bases: _Aspect

Key for a DataHub Access Token

  • Parameters:id (str)

property id : str

Access token’s SHA-256 hashed JWT signature

DataHubActionKeyClass

class datahub.metadata.schema_classes.DataHubActionKeyClass(id)

Bases: _Aspect

Key for a DataHub Action Pipeline

  • Parameters:id (str)

property id : str

A unique id for the Action, either generated or provided

DataHubActorFilterClass

class datahub.metadata.schema_classes.DataHubActorFilterClass(users = None, groups = None, resourceOwners = None, resourceOwnersTypes = None, allUsers = None, allGroups = None, roles = None)

Bases: DictWrapper

Information used to filter DataHub actors.

  • Parameters:
    • users (Optional[List[str]])
    • groups (Optional[List[str]])
    • resourceOwners (Optional[bool])
    • resourceOwnersTypes (Optional[List[str]])
    • allUsers (Optional[bool])
    • allGroups (Optional[bool])
    • roles (Optional[List[str]])

property allGroups : bool

Whether the filter should apply to all groups.

property allUsers : bool

Whether the filter should apply to all users.

property groups : None | List[str]

A specific set of groups to apply the policy to (disjunctive)

property resourceOwners : bool

Whether the filter should return true for owners of a particular resource. Only applies to policies of type ‘Metadata’, which have a resource associated with them.

property resourceOwnersTypes : None | List[str]

Define type of ownership for the policy

property roles : None | List[str]

A specific set of roles to apply the policy to (disjunctive).

property users : None | List[str]

A specific set of users to apply the policy to (disjunctive)

DataHubConnectionDetailsClass

class datahub.metadata.schema_classes.DataHubConnectionDetailsClass(type, name = None, json = None)

Bases: _Aspect

Information about a connection to an external platform.

property json : None | DataHubJsonConnectionClass

An JSON payload containing raw connection details. This will be present if the type is JSON.

property name : None | str

Display name of the connection

property type : str | DataHubConnectionDetailsTypeClass

The type of the connection. This defines the schema / encoding of the connection details.

DataHubConnectionDetailsTypeClass

class datahub.metadata.schema_classes.DataHubConnectionDetailsTypeClass()

Bases: object

JSON = 'JSON'

DataHubConnectionKeyClass

class datahub.metadata.schema_classes.DataHubConnectionKeyClass(id)

Bases: _Aspect

Key for a Connection

  • Parameters:id (str)

property id : str

A unique identifier for the connection.

DataHubIngestionSourceConfigClass

class datahub.metadata.schema_classes.DataHubIngestionSourceConfigClass(recipe, version = None, executorId = None, debugMode = None, extraArgs = None)

Bases: DictWrapper

  • Parameters:
    • recipe (str)
    • version (Optional[str])
    • executorId (Optional[str])
    • debugMode (Optional[bool])
    • extraArgs (Optional[Dict[str, str]])

property debugMode : None | bool

Whether or not to run this ingestion source in debug mode

property executorId : None | str

The id of the executor to use to execute the ingestion run

property extraArgs : None | Dict[str, str]

Extra arguments for the ingestion run.

property recipe : str

The JSON recipe to use for ingestion

property version : None | str

The PyPI version of the datahub CLI to use when executing a recipe

DataHubIngestionSourceInfoClass

class datahub.metadata.schema_classes.DataHubIngestionSourceInfoClass(name, type, config, platform = None, schedule = None, source = None)

Bases: _Aspect

Info about a DataHub ingestion source

property config : DataHubIngestionSourceConfigClass

Parameters associated with the Ingestion Source

property name : str

The display name of the ingestion source

property platform : None | str

Data Platform URN associated with the source

property schedule : None | DataHubIngestionSourceScheduleClass

The schedule on which the ingestion source is executed

property source : None | DataHubIngestionSourceSourceClass

The source or origin of the Ingestion Source

Currently CLI and UI do not provide an explicit source.

property type : str

The type of the source itself, e.g. mysql, bigquery, bigquery-usage. Should match the recipe.

DataHubIngestionSourceKeyClass

class datahub.metadata.schema_classes.DataHubIngestionSourceKeyClass(id)

Bases: _Aspect

Key for a DataHub ingestion source

  • Parameters:id (str)

property id : str

A unique id for the Ingestion Source, either generated or provided

DataHubIngestionSourceScheduleClass

class datahub.metadata.schema_classes.DataHubIngestionSourceScheduleClass(interval, timezone)

Bases: DictWrapper

The schedule associated with an ingestion source.

  • Parameters:
    • interval (str)
    • timezone (str)

property interval : str

A cron-formatted execution interval, as a cron string, e.g. * * * * *

property timezone : str

Timezone in which the cron interval applies, e.g. America/Los Angeles

DataHubIngestionSourceSourceClass

class datahub.metadata.schema_classes.DataHubIngestionSourceSourceClass(type)

Bases: DictWrapper

property type : str | DataHubIngestionSourceSourceTypeClass

The source type of the ingestion source

DataHubIngestionSourceSourceTypeClass

class datahub.metadata.schema_classes.DataHubIngestionSourceSourceTypeClass()

Bases: object

SYSTEM = 'SYSTEM'

DataHubJsonConnectionClass

class datahub.metadata.schema_classes.DataHubJsonConnectionClass(encryptedBlob)

Bases: DictWrapper

A set of connection details consisting of an encrypted JSON blob.

  • Parameters:encryptedBlob (str)

property encryptedBlob : str

The encrypted JSON connection details.

DataHubOpenAPISchemaKeyClass

class datahub.metadata.schema_classes.DataHubOpenAPISchemaKeyClass(id)

Bases: _Aspect

Key for a Query

  • Parameters:id (str)

property id : str

A unique id for the DataHub OpenAPI schema.

DataHubPersonaInfoClass

class datahub.metadata.schema_classes.DataHubPersonaInfoClass()

Bases: _Aspect

Placeholder aspect for persona type info

DataHubPersonaKeyClass

class datahub.metadata.schema_classes.DataHubPersonaKeyClass(id)

Bases: _Aspect

Key for a persona type

  • Parameters:id (str)

property id : str

A unique id for the persona type

DataHubPolicyInfoClass

class datahub.metadata.schema_classes.DataHubPolicyInfoClass(displayName, description, type, state, privileges, actors, resources = None, editable = None, lastUpdatedTimestamp = None)

Bases: _Aspect

Information about a DataHub (UI) access policy.

property actors : DataHubActorFilterClass

The actors that the policy applies to.

property description : str

Description of the Policy

property displayName : str

Display name of the Policy

property editable : bool

Whether the policy should be editable via the UI

property lastUpdatedTimestamp : None | int

Timestamp when the policy was last updated

property privileges : List[str]

The privileges that the policy grants.

property resources : None | DataHubResourceFilterClass

The resource that the policy applies to. Not required for some ‘Platform’ privileges.

property state : str

The state of policy, ACTIVE or INACTIVE

property type : str

The type of policy

DataHubPolicyKeyClass

class datahub.metadata.schema_classes.DataHubPolicyKeyClass(id)

Bases: _Aspect

Key for a DataHub Policy

  • Parameters:id (str)

property id : str

A unique id for the DataHub access policy record. Generated on the server side at policy creation time.

DataHubPolicySnapshotClass

class datahub.metadata.schema_classes.DataHubPolicySnapshotClass(urn, aspects)

Bases: DictWrapper

A metadata snapshot for DataHub Access Policy data.

property aspects : List[DataHubPolicyKeyClass | DataHubPolicyInfoClass]

The list of metadata aspects associated with the DataHub access policy.

property urn : str

URN for the entity the metadata snapshot is associated with.

DataHubResourceFilterClass

class datahub.metadata.schema_classes.DataHubResourceFilterClass(type = None, resources = None, allResources = None, filter = None, privilegeConstraints = None)

Bases: DictWrapper

Information used to filter DataHub resource.

property allResources : bool

Whether the policy should be applied to all assets matching the filter.

property filter : None | PolicyMatchFilterClass

Filter to apply privileges to

property privilegeConstraints : None | PolicyMatchFilterClass

Constraints around what sub-resources operations are allowed to modify, i.e. NOT_EQUALS - cannot modify a particular defined tag, EQUALS - can only modify a particular defined tag, STARTS_WITH - can only modify a tag starting with xyz

property resources : None | List[str]

A specific set of resources to apply the policy to, e.g. asset urns

property type : None | str

The type of resource that the policy applies to. This will most often be a data asset entity name, for example ‘dataset’. It is not strictly required because in the future we will want to support filtering a resource by domain, as well.

DataHubRetentionConfigClass

class datahub.metadata.schema_classes.DataHubRetentionConfigClass(retention)

Bases: _Aspect

property retention : RetentionClass

DataHubRetentionKeyClass

class datahub.metadata.schema_classes.DataHubRetentionKeyClass(entityName, aspectName)

Bases: _Aspect

Key for a DataHub Retention

  • Parameters:
    • entityName (str)
    • aspectName (str)

property aspectName : str

Aspect name to apply retention to. * (or empty) for applying defaults.

property entityName : str

Entity name to apply retention to. * (or empty) for applying defaults.

DataHubRetentionSnapshotClass

class datahub.metadata.schema_classes.DataHubRetentionSnapshotClass(urn, aspects)

Bases: DictWrapper

A metadata snapshot for DataHub Access Policy data.

property aspects : List[DataHubRetentionKeyClass | DataHubRetentionConfigClass]

The list of metadata aspects associated with the DataHub access policy.

property urn : str

URN for the entity the metadata snapshot is associated with.

DataHubRoleInfoClass

class datahub.metadata.schema_classes.DataHubRoleInfoClass(name, description, editable = None)

Bases: _Aspect

Information about a DataHub Role.

  • Parameters:
    • name (str)
    • description (str)
    • editable (Optional[bool])

property description : str

Description of the Role

property editable : bool

Whether the role should be editable via the UI

property name : str

Name of the Role

DataHubRoleKeyClass

class datahub.metadata.schema_classes.DataHubRoleKeyClass(id)

Bases: _Aspect

Key for a DataHub Role

  • Parameters:id (str)

property id : str

A unique id for the DataHub role record. Generated on the server side at role creation time.

DataHubSearchConfigClass

class datahub.metadata.schema_classes.DataHubSearchConfigClass(fieldName = None, fieldType = None, queryByDefault = None, enableAutocomplete = None, addToFilters = None, addHasValuesToFilters = None, filterNameOverride = None, hasValuesFilterNameOverride = None, boostScore = None, hasValuesFieldName = None, numValuesFieldName = None, weightsPerFieldValue = None, fieldNameAliases = None)

Bases: DictWrapper

Configuration for how any given field should be indexed and matched in the DataHub search index.

  • Parameters:
    • fieldName (Optional[str])
    • fieldType (Union[None, str, SearchFieldTypeClass]) –
    • queryByDefault (Optional[bool])
    • enableAutocomplete (Optional[bool])
    • addToFilters (Optional[bool])
    • addHasValuesToFilters (Optional[bool])
    • filterNameOverride (Optional[str])
    • hasValuesFilterNameOverride (Optional[str])
    • boostScore (Optional[float])
    • hasValuesFieldName (Optional[str])
    • numValuesFieldName (Optional[str])
    • weightsPerFieldValue (Optional[Dict[str, float]])
    • fieldNameAliases (Optional[List[str]])

property addHasValuesToFilters : bool

Whether or not to add the “has values” to filters. check if this is conditional on addToFilters being true

property addToFilters : bool

Whether or not to add field to filters.

property boostScore : float

Boost multiplier to the match score. Matches on fields with higher boost score ranks higher

property enableAutocomplete : bool

Whether we should use the field for default autocomplete

property fieldName : None | str

Name of the field in the search index. Defaults to the field name otherwise

property fieldNameAliases : None | List[str]

(Optional) Aliases for this given field that can be used for sorting etc.

property fieldType : None | str | SearchFieldTypeClass

Type of the field. Defines how the field is indexed and matched

property filterNameOverride : None | str

Display name of the filter

property hasValuesFieldName : None | str

If set, add a index field of the given name that checks whether the field exists

property hasValuesFilterNameOverride : None | str

Display name of the has values filter

property numValuesFieldName : None | str

If set, add a index field of the given name that checks the number of elements

property queryByDefault : bool

Whether we should match the field for the default search query

property weightsPerFieldValue : None | Dict[str, float]

(Optional) Weights to apply to score for a given value

DataHubSecretKeyClass

class datahub.metadata.schema_classes.DataHubSecretKeyClass(id)

Bases: _Aspect

Key for a DataHub Secret

  • Parameters:id (str)

property id : str

A unique id for the Secret

DataHubSecretValueClass

class datahub.metadata.schema_classes.DataHubSecretValueClass(name, value, description = None, created = None)

Bases: _Aspect

The value of a DataHub Secret

  • Parameters:
    • name (str)
    • value (str)
    • description (Optional[str])
    • created (Optional[AuditStampClass]) –

property created : None | AuditStampClass

Created Audit stamp

property description : None | str

Description of the secret

property name : str

The display name for the secret

property value : str

The AES-encrypted value of the DataHub secret.

DataHubStepStateKeyClass

class datahub.metadata.schema_classes.DataHubStepStateKeyClass(id)

Bases: _Aspect

Key for a DataHub Step State

  • Parameters:id (str)

property id : str

A unique id for the state

DataHubStepStatePropertiesClass

class datahub.metadata.schema_classes.DataHubStepStatePropertiesClass(lastModified, properties = None)

Bases: _Aspect

The properties associated with a DataHub step state

  • Parameters:

property lastModified : AuditStampClass

Audit stamp describing the last person to update it.

property properties : Dict[str, str]

Description of the secret

DataHubUpgradeKeyClass

class datahub.metadata.schema_classes.DataHubUpgradeKeyClass(id)

Bases: _Aspect

Key for a DataHubUpgrade

  • Parameters:id (str)

property id : str

DataHubUpgradeRequestClass

class datahub.metadata.schema_classes.DataHubUpgradeRequestClass(timestampMs, version)

Bases: _Aspect

Information collected when kicking off a DataHubUpgrade

  • Parameters:
    • timestampMs (int)
    • version (str)

property timestampMs : int

Timestamp when we started this DataHubUpgrade

property version : str

Version of this upgrade

DataHubUpgradeResultClass

class datahub.metadata.schema_classes.DataHubUpgradeResultClass(timestampMs, state = None, result = None)

Bases: _Aspect

Information collected when a DataHubUpgrade successfully finishes

property result : None | Dict[str, str]

Result map to place helpful information about this upgrade job

property state : str | DataHubUpgradeStateClass | None

Upgrade state UpgradeResult.Result

property timestampMs : int

Timestamp when we started this DataHubUpgrade

DataHubUpgradeStateClass

class datahub.metadata.schema_classes.DataHubUpgradeStateClass()

Bases: object

ABORTED = 'ABORTED'

FAILED = 'FAILED'

IN_PROGRESS = 'IN_PROGRESS'

SUCCEEDED = 'SUCCEEDED'

DataHubViewDefinitionClass

class datahub.metadata.schema_classes.DataHubViewDefinitionClass(entityTypes, filter)

Bases: DictWrapper

A View definition.

  • Parameters:

property entityTypes : List[str]

The Entity Types in the scope of the View.

property filter : FilterClass

The filter criteria, which represents the view itself

DataHubViewInfoClass

class datahub.metadata.schema_classes.DataHubViewInfoClass(name, type, definition, created, lastModified, description = None)

Bases: _Aspect

Information about a DataHub View. – TODO: Understand whether an entity type filter is required.

property created : AuditStampClass

Audit stamp capturing the time and actor who created the View.

property definition : DataHubViewDefinitionClass

The view itself

property description : None | str

Description of the view

property lastModified : AuditStampClass

Audit stamp capturing the time and actor who last modified the View.

property name : str

The name of the View

property type : str | DataHubViewTypeClass

The type of View

DataHubViewKeyClass

class datahub.metadata.schema_classes.DataHubViewKeyClass(id)

Bases: _Aspect

Key for a DataHub View

  • Parameters:id (str)

property id : str

A unique id for the View

DataHubViewTypeClass

class datahub.metadata.schema_classes.DataHubViewTypeClass()

Bases: object

GLOBAL = 'GLOBAL'

PERSONAL = 'PERSONAL'

DataJobInfoClass

class datahub.metadata.schema_classes.DataJobInfoClass(name, type, customProperties = None, externalUrl = None, description = None, flowUrn = None, created = None, lastModified = None, status = None, env = None)

Bases: _Aspect

Information about a Data processing job

property created : None | TimeStampClass

A timestamp documenting when the asset was created in the source Data Platform (not on DataHub)

property customProperties : Dict[str, str]

Custom property bag.

property description : None | str

Job description

property env : None | str | FabricTypeClass

Environment for this job

property externalUrl : None | str

URL where the reference exist

property flowUrn : None | str

DataFlow urn that this job is part of

property lastModified : None | TimeStampClass

A timestamp documenting when the asset was last modified in the source Data Platform (not on DataHub)

property name : str

Job name

property status : None | str | JobStatusClass

Status of the job - Deprecated for Data Process Instance model.

property type : str | AzkabanJobTypeClass

Datajob type *NOTE**: AzkabanJobType is deprecated. Please use strings instead.

DataJobInputOutputClass

class datahub.metadata.schema_classes.DataJobInputOutputClass(inputDatasets, outputDatasets, inputDatasetEdges = None, outputDatasetEdges = None, inputDatajobs = None, inputDatajobEdges = None, inputDatasetFields = None, outputDatasetFields = None, fineGrainedLineages = None)

Bases: _Aspect

Information about the inputs and outputs of a Data processing job

  • Parameters:
    • inputDatasets (List[str])
    • outputDatasets (List[str])
    • inputDatasetEdges (Optional[List[EdgeClass]]) –
    • outputDatasetEdges (Optional[List[EdgeClass]]) –
    • inputDatajobs (Optional[List[str]])
    • inputDatajobEdges (Optional[List[EdgeClass]]) –
    • inputDatasetFields (Optional[List[str]])
    • outputDatasetFields (Optional[List[str]])
    • fineGrainedLineages (Optional[List[FineGrainedLineageClass]]) –

property fineGrainedLineages : None | List[FineGrainedLineageClass]

Fine-grained column-level lineages Not currently supported in the UI Use UpstreamLineage aspect for datasets to express Column Level Lineage for the UI

property inputDatajobEdges : None | List[EdgeClass]

Input datajobs that this data job depends on

property inputDatajobs : None | List[str]

Input datajobs that this data job depends on Deprecated! Use inputDatajobEdges instead.

property inputDatasetEdges : None | List[EdgeClass]

Input datasets consumed by the data job during processing

property inputDatasetFields : None | List[str]

Fields of the input datasets used by this job

property inputDatasets : List[str]

Input datasets consumed by the data job during processing Deprecated! Use inputDatasetEdges instead.

property outputDatasetEdges : None | List[EdgeClass]

Output datasets produced by the data job during processing

property outputDatasetFields : None | List[str]

Fields of the output datasets this job writes to

property outputDatasets : List[str]

Output datasets produced by the data job during processing Deprecated! Use outputDatasetEdges instead.

DataJobKeyClass

class datahub.metadata.schema_classes.DataJobKeyClass(flow, jobId)

Bases: _Aspect

Key for a Data Job

  • Parameters:
    • flow (str)
    • jobId (str)

property flow : str

Standardized data processing flow urn representing the flow for the job

property jobId : str

Unique Identifier of the data job

DataJobSnapshotClass

class datahub.metadata.schema_classes.DataJobSnapshotClass(urn, aspects)

Bases: DictWrapper

A metadata snapshot for a specific DataJob entity.

property aspects : List[DataJobKeyClass | DataJobInfoClass | DataJobInputOutputClass | EditableDataJobPropertiesClass | OwnershipClass | StatusClass | GlobalTagsClass | BrowsePathsClass | GlossaryTermsClass | InstitutionalMemoryClass | DataPlatformInstanceClass | BrowsePathsV2Class]

The list of metadata aspects associated with the data job. Depending on the use case, this can either be all, or a selection, of supported aspects.

property urn : str

URN for the entity the metadata snapshot is associated with.

DataPlatformInfoClass

class datahub.metadata.schema_classes.DataPlatformInfoClass(name, type, datasetNameDelimiter, displayName = None, logoUrl = None)

Bases: _Aspect

Information about a data platform

  • Parameters:
    • name (str)
    • type (Union[str, PlatformTypeClass]) –
    • datasetNameDelimiter (str)
    • displayName (Optional[str])
    • logoUrl (Optional[str])

property datasetNameDelimiter : str

The delimiter in the dataset names on the data platform, e.g. ‘/’ for HDFS and ‘.’ for Oracle

property displayName : None | str

The name that will be used for displaying a platform type.

property logoUrl : None | str

The URL for a logo associated with the platform

property name : str

Name of the data platform

property type : str | PlatformTypeClass

Platform type this data platform describes

DataPlatformInstanceClass

class datahub.metadata.schema_classes.DataPlatformInstanceClass(platform, instance = None)

Bases: _Aspect

The specific instance of the data platform that this entity belongs to

  • Parameters:
    • platform (str)
    • instance (Optional[str])

property instance : None | str

Instance of the data platform (e.g. db instance)

property platform : str

Data Platform

DataPlatformInstanceKeyClass

class datahub.metadata.schema_classes.DataPlatformInstanceKeyClass(platform, instance)

Bases: _Aspect

Key for a Dataset

  • Parameters:
    • platform (str)
    • instance (str)

property instance : str

Unique instance id

property platform : str

Data platform urn associated with the instance

DataPlatformInstancePropertiesClass

class datahub.metadata.schema_classes.DataPlatformInstancePropertiesClass(customProperties = None, externalUrl = None, name = None, description = None)

Bases: _Aspect

Properties associated with a Data Platform Instance

  • Parameters:
    • customProperties (Optional[Dict[str, str]])
    • externalUrl (Optional[str])
    • name (Optional[str])
    • description (Optional[str])

property customProperties : Dict[str, str]

Custom property bag.

property description : None | str

Documentation of the Data Platform Instance

property externalUrl : None | str

URL where the reference exist

property name : None | str

Display name of the Data Platform Instance

DataPlatformKeyClass

class datahub.metadata.schema_classes.DataPlatformKeyClass(platformName)

Bases: _Aspect

Key for a Data Platform

  • Parameters:platformName (str)

property platformName : str

Data platform name i.e. hdfs, oracle, espresso

DataPlatformSnapshotClass

class datahub.metadata.schema_classes.DataPlatformSnapshotClass(urn, aspects)

Bases: DictWrapper

A metadata snapshot for a specific dataplatform entity.

property aspects : List[DataPlatformKeyClass | DataPlatformInfoClass]

The list of metadata aspects associated with the data platform. Depending on the use case, this can either be all, or a selection, of supported aspects.

property urn : str

URN for the entity the metadata snapshot is associated with.

DataProcessInfoClass

class datahub.metadata.schema_classes.DataProcessInfoClass(inputs = None, outputs = None)

Bases: _Aspect

The inputs and outputs of this data process

  • Parameters:
    • inputs (Optional[List[str]])
    • outputs (Optional[List[str]])

property inputs : None | List[str]

the inputs of the data process

property outputs : None | List[str]

the outputs of the data process

DataProcessInstanceInputClass

class datahub.metadata.schema_classes.DataProcessInstanceInputClass(inputs, inputEdges = None)

Bases: _Aspect

Information about the inputs datasets of a Data process

  • Parameters:
    • inputs (List[str])
    • inputEdges (Optional[List[EdgeClass]]) –

property inputEdges : None | List[EdgeClass]

Input assets consumed by the data process instance, with additional metadata. Counts as lineage. Will eventually deprecate the inputs field.

property inputs : List[str]

Input assets consumed

DataProcessInstanceKeyClass

class datahub.metadata.schema_classes.DataProcessInstanceKeyClass(id)

Bases: _Aspect

Key for an Asset DataProcessInstance

  • Parameters:id (str)

property id : str

A unique id for the DataProcessInstance . Should be separate from the name used for displaying a DataProcessInstance.

DataProcessInstanceOutputClass

class datahub.metadata.schema_classes.DataProcessInstanceOutputClass(outputs, outputEdges = None)

Bases: _Aspect

Information about the outputs of a Data process

  • Parameters:
    • outputs (List[str])
    • outputEdges (Optional[List[EdgeClass]]) –

property outputEdges : None | List[EdgeClass]

Output assets produced by the data process instance during processing, with additional metadata. Counts as lineage. Will eventually deprecate the outputs field.

property outputs : List[str]

Output assets produced

DataProcessInstancePropertiesClass

class datahub.metadata.schema_classes.DataProcessInstancePropertiesClass(name, created, customProperties = None, externalUrl = None, type = None)

Bases: _Aspect

The inputs and outputs of this data process

property created : AuditStampClass

Audit stamp containing who reported the lineage and when

property customProperties : Dict[str, str]

Custom property bag.

property externalUrl : None | str

URL where the reference exist

property name : str

Process name

property type : None | str | DataProcessTypeClass

Process type

DataProcessInstanceRelationshipsClass

class datahub.metadata.schema_classes.DataProcessInstanceRelationshipsClass(upstreamInstances, parentTemplate = None, parentInstance = None)

Bases: _Aspect

Information about Data process relationships

  • Parameters:
    • upstreamInstances (List[str])
    • parentTemplate (Optional[str])
    • parentInstance (Optional[str])

property parentInstance : None | str

The parent DataProcessInstance where it belongs to. If it is a Airflow Task then it should belong to an Airflow Dag run as well which will be another DataProcessInstance

property parentTemplate : None | str

The parent entity whose run instance it is

property upstreamInstances : List[str]

Input DataProcessInstance which triggered this dataprocess instance

DataProcessInstanceRunEventClass

class datahub.metadata.schema_classes.DataProcessInstanceRunEventClass(timestampMillis, status, eventGranularity = None, partitionSpec = None, messageId = None, externalUrl = None, attempt = None, result = None, durationMillis = None)

Bases: _Aspect

An event representing the current status of data process run. DataProcessRunEvent should be used for reporting the status of a dataProcess’ run.

ASPECT_TYPE : ClassVar[str] = 'timeseries'

property attempt : None | int

Return the try number that this Instance Run is in

property durationMillis : None | int

The duration of the run in milliseconds.

property eventGranularity : None | TimeWindowSizeClass

Granularity of the event if applicable

property externalUrl : None | str

URL where the reference exist

property messageId : None | str

The optional messageId, if provided serves as a custom user-defined unique identifier for an aspect value.

property partitionSpec : PartitionSpecClass | None

The optional partition specification.

property result : None | DataProcessInstanceRunResultClass

The final result of the Data Processing run.

property status : str | DataProcessRunStatusClass

property timestampMillis : int

The event timestamp field as epoch at UTC in milli seconds.

DataProcessInstanceRunResultClass

class datahub.metadata.schema_classes.DataProcessInstanceRunResultClass(type, nativeResultType)

Bases: DictWrapper

property nativeResultType : str

It identifies the system where the native result comes from like Airflow, Azkaban, etc..

property type : str | RunResultTypeClass

The final result, e.g. SUCCESS, FAILURE, SKIPPED, or UP_FOR_RETRY.

DataProcessKeyClass

class datahub.metadata.schema_classes.DataProcessKeyClass(name, orchestrator, origin)

Bases: _Aspect

Key for a Data Process

  • Parameters:

property name : str

Process name i.e. an ETL job name

property orchestrator : str

Standardized Orchestrator where data process is defined. TODO: Migrate towards something that can be validated like DataPlatform urn

property origin : str | FabricTypeClass

Fabric type where dataset belongs to or where it was generated.

DataProcessRunStatusClass

class datahub.metadata.schema_classes.DataProcessRunStatusClass()

Bases: object

COMPLETE = 'COMPLETE'

STARTED = 'STARTED'

DataProcessSnapshotClass

class datahub.metadata.schema_classes.DataProcessSnapshotClass(urn, aspects)

Bases: DictWrapper

A metadata snapshot for a specific Data process entity.

property aspects : List[DataProcessKeyClass | OwnershipClass | DataProcessInfoClass | StatusClass]

The list of metadata aspects associated with the data process. Depending on the use case, this can either be all, or a selection, of supported aspects.

property urn : str

URN for the entity the metadata snapshot is associated with.

DataProcessTypeClass

class datahub.metadata.schema_classes.DataProcessTypeClass()

Bases: object

BATCH_AD_HOC = 'BATCH_AD_HOC'

BATCH_SCHEDULED = 'BATCH_SCHEDULED'

STREAMING = 'STREAMING'

DataProductAssociationClass

class datahub.metadata.schema_classes.DataProductAssociationClass(destinationUrn, sourceUrn = None, created = None, lastModified = None, properties = None, outputPort = None)

Bases: DictWrapper

Represents an association of assets to a Data Product.

  • Parameters:
    • destinationUrn (str)
    • sourceUrn (Optional[str])
    • created (Optional[AuditStampClass]) –
    • lastModified (Optional[AuditStampClass]) –
    • properties (Optional[Dict[str, str]])
    • outputPort (Optional[bool])

property created : None | AuditStampClass

Audit stamp containing who created this relationship edge and when

property destinationUrn : str

Urn of the destination of this relationship edge.

property lastModified : None | AuditStampClass

Audit stamp containing who last modified this relationship edge and when

property outputPort : bool

If set to true, this asset is an output port of the Data Product.

property properties : None | Dict[str, str]

A generic properties bag that allows us to store specific information on this graph edge.

property sourceUrn : None | str

Urn of the source of this relationship edge. If not specified, assumed to be the entity that this aspect belongs to.

DataProductKeyClass

class datahub.metadata.schema_classes.DataProductKeyClass(id)

Bases: _Aspect

Key for a Query

  • Parameters:id (str)

property id : str

A unique id for the Data Product.

DataProductPropertiesClass

class datahub.metadata.schema_classes.DataProductPropertiesClass(customProperties = None, externalUrl = None, name = None, description = None, assets = None)

Bases: _Aspect

The main properties of a Data Product

  • Parameters:
    • customProperties (Optional[Dict[str, str]])
    • externalUrl (Optional[str])
    • name (Optional[str])
    • description (Optional[str])
    • assets (Optional[List[DataProductAssociationClass]]) –

property assets : None | List[DataProductAssociationClass]

A list of assets that are part of this Data Product

property customProperties : Dict[str, str]

Custom property bag.

property description : None | str

Documentation of the data product

property externalUrl : None | str

URL where the reference exist

property name : None | str

Display name of the Data Product

DataQualityContractClass

class datahub.metadata.schema_classes.DataQualityContractClass(assertion)

Bases: DictWrapper

A data quality contract pertaining to a physical data asset Data Quality contracts are used to make assertions about data quality metrics for a physical data asset

  • Parameters:assertion (str)

property assertion : str

The assertion representing the Data Quality contract. E.g. a table or column-level assertion.

DataTransformClass

class datahub.metadata.schema_classes.DataTransformClass(queryStatement = None)

Bases: DictWrapper

Information about a transformation. It may be a query,

property queryStatement : None | QueryStatementClass

The data transform may be defined by a query statement

DataTransformLogicClass

class datahub.metadata.schema_classes.DataTransformLogicClass(transforms)

Bases: _Aspect

Information about a Query against one or more data assets (e.g. Tables or Views).

property transforms : List[DataTransformClass]

List of transformations applied

DataTypeInfoClass

class datahub.metadata.schema_classes.DataTypeInfoClass(qualifiedName, displayName = None, description = None)

Bases: _Aspect

  • Parameters:
    • qualifiedName (str)
    • displayName (Optional[str])
    • description (Optional[str])

property description : None | str

An optional description for the data type.

property displayName : None | str

An optional display name for the data type.

property qualifiedName : str

The qualified name for the data type. Usually a unique namespace + name, e.g. datahub.string

DataTypeKeyClass

class datahub.metadata.schema_classes.DataTypeKeyClass(id)

Bases: _Aspect

  • Parameters:id (str)

property id : str

A unique id for a data type. Usually this will be a unique namespace + data type name.

DatahubIngestionCheckpointClass

class datahub.metadata.schema_classes.DatahubIngestionCheckpointClass(timestampMillis, pipelineName, platformInstanceId, config, state, runId, eventGranularity = None, partitionSpec = None, messageId = None)

Bases: _Aspect

Checkpoint of a datahub ingestion run for a given job.

ASPECT_TYPE : ClassVar[str] = 'timeseries'

property config : str

Json-encoded string representation of the non-secret members of the config .

property eventGranularity : None | TimeWindowSizeClass

Granularity of the event if applicable

property messageId : None | str

The optional messageId, if provided serves as a custom user-defined unique identifier for an aspect value.

property partitionSpec : PartitionSpecClass | None

The optional partition specification.

property pipelineName : str

The name of the pipeline that ran ingestion, a stable unique user provided identifier. e.g. my_snowflake1-to-datahub.

property platformInstanceId : str

The id of the instance against which the ingestion pipeline ran. e.g.: Bigquery project ids, MySQL hostnames etc.

property runId : str

The run identifier of this job.

property state : IngestionCheckpointStateClass

Opaque blob of the state representation.

property timestampMillis : int

The event timestamp field as epoch at UTC in milli seconds.

DatahubIngestionRunSummaryClass

class datahub.metadata.schema_classes.DatahubIngestionRunSummaryClass(timestampMillis, pipelineName, platformInstanceId, runId, runStatus, eventGranularity = None, partitionSpec = None, messageId = None, numWorkUnitsCommitted = None, numWorkUnitsCreated = None, numEvents = None, numEntities = None, numAspects = None, numSourceAPICalls = None, totalLatencySourceAPICalls = None, numSinkAPICalls = None, totalLatencySinkAPICalls = None, numWarnings = None, numErrors = None, numEntitiesSkipped = None, config = None, custom_summary = None, softwareVersion = None, systemHostName = None, operatingSystemName = None, numProcessors = None, totalMemory = None, availableMemory = None)

Bases: _Aspect

Summary of a datahub ingestion run for a given platform.

  • Parameters:
    • timestampMillis (int)
    • pipelineName (str)
    • platformInstanceId (str)
    • runId (str)
    • runStatus (Union[str, JobStatusClass]) –
    • eventGranularity (Optional[TimeWindowSizeClass]) –
    • partitionSpec (Optional[PartitionSpecClass]) –
    • messageId (Optional[str])
    • numWorkUnitsCommitted (Optional[int])
    • numWorkUnitsCreated (Optional[int])
    • numEvents (Optional[int])
    • numEntities (Optional[int])
    • numAspects (Optional[int])
    • numSourceAPICalls (Optional[int])
    • totalLatencySourceAPICalls (Optional[int])
    • numSinkAPICalls (Optional[int])
    • totalLatencySinkAPICalls (Optional[int])
    • numWarnings (Optional[int])
    • numErrors (Optional[int])
    • numEntitiesSkipped (Optional[int])
    • config (Optional[str])
    • custom_summary (Optional[str])
    • softwareVersion (Optional[str])
    • systemHostName (Optional[str])
    • operatingSystemName (Optional[str])
    • numProcessors (Optional[int])
    • totalMemory (Optional[int])
    • availableMemory (Optional[int])

ASPECT_TYPE : ClassVar[str] = 'timeseries'

property availableMemory : None | int

The available memory on the host the ingestion pipeline ran on.

property config : None | str

The non-sensitive key-value pairs of the yaml config used as json string.

property custom_summary : None | str

Custom value.

property eventGranularity : None | TimeWindowSizeClass

Granularity of the event if applicable

property messageId : None | str

The optional messageId, if provided serves as a custom user-defined unique identifier for an aspect value.

property numAspects : None | int

The total number of aspects produced across all entities.

property numEntities : None | int

The total number of entities produced (unique entity urns).

property numEntitiesSkipped : None | int

Number of entities skipped.

property numErrors : None | int

Number of errors generated.

property numEvents : None | int

The number of events produced (MCE + MCP).

property numProcessors : None | int

The number of processors on the host the ingestion pipeline ran on.

property numSinkAPICalls : None | int

Total number of sink API calls.

property numSourceAPICalls : None | int

Total number of source API calls.

property numWarnings : None | int

Number of warnings generated.

property numWorkUnitsCommitted : None | int

The number of workunits written to sink.

property numWorkUnitsCreated : None | int

The number of workunits that are produced.

property operatingSystemName : None | str

The os the ingestion pipeline ran on.

property partitionSpec : PartitionSpecClass | None

The optional partition specification.

property pipelineName : str

The name of the pipeline that ran ingestion, a stable unique user provided identifier. e.g. my_snowflake1-to-datahub.

property platformInstanceId : str

The id of the instance against which the ingestion pipeline ran. e.g.: Bigquery project ids, MySQL hostnames etc.

property runId : str

The runId for this pipeline instance.

property runStatus : str | JobStatusClass

Run Status - Succeeded/Skipped/Failed etc.

property softwareVersion : None | str

The software version of this ingestion.

property systemHostName : None | str

The hostname the ingestion pipeline ran on.

property timestampMillis : int

The event timestamp field as epoch at UTC in milli seconds.

property totalLatencySinkAPICalls : None | int

Total latency across all sink API calls.

property totalLatencySourceAPICalls : None | int

Total latency across all source API calls.

property totalMemory : None | int

The total amount of memory on the host the ingestion pipeline ran on.

DatasetAssertionInfoClass

class datahub.metadata.schema_classes.DatasetAssertionInfoClass(dataset, scope, operator, fields = None, aggregation = None, parameters = None, nativeType = None, nativeParameters = None, logic = None)

Bases: DictWrapper

Attributes that are applicable to single-Dataset Assertions

property aggregation : None | str | AssertionStdAggregationClass

Standardized assertion operator This field is left blank if there is no selected aggregation or metric for a particular column.

property dataset : str

The dataset targeted by this assertion.

property fields : None | List[str]

One or more dataset schema fields that are targeted by this assertion.

This field is expected to be provided if the assertion scope is DATASET_COLUMN.

property logic : None | str

property nativeParameters : None | Dict[str, str]

Native parameters required for the assertion.

property nativeType : None | str

Native assertion type

property operator : str | AssertionStdOperatorClass

Standardized assertion operator

property parameters : None | AssertionStdParametersClass

Standard parameters required for the assertion. e.g. min_value, max_value, value, columns

property scope : str | DatasetAssertionScopeClass

Scope of the Assertion. What part of the dataset does this assertion apply to?

DatasetAssertionScopeClass

class datahub.metadata.schema_classes.DatasetAssertionScopeClass()

Bases: object

DATASET_COLUMN = 'DATASET_COLUMN'

DATASET_ROWS = 'DATASET_ROWS'

DATASET_SCHEMA = 'DATASET_SCHEMA'

DATASET_STORAGE_SIZE = 'DATASET_STORAGE_SIZE'

UNKNOWN = 'UNKNOWN'

DatasetDeprecationClass

class datahub.metadata.schema_classes.DatasetDeprecationClass(deprecated, note, decommissionTime = None, actor = None)

Bases: _Aspect

Dataset deprecation status Deprecated! This aspect is deprecated in favor of the more-general-purpose ‘Deprecation’ aspect.

  • Parameters:
    • deprecated (bool)
    • note (str)
    • decommissionTime (Optional[int])
    • actor (Optional[str])

property actor : None | str

The corpuser URN which will be credited for modifying this deprecation content.

property decommissionTime : None | int

The time user plan to decommission this dataset.

property deprecated : bool

Whether the dataset is deprecated by owner.

property note : str

Additional information about the dataset deprecation plan, such as the wiki, doc, RB.

DatasetFieldForeignKeyClass

class datahub.metadata.schema_classes.DatasetFieldForeignKeyClass(parentDataset, currentFieldPaths, parentField)

Bases: DictWrapper

For non-urn based foregin keys.

  • Parameters:
    • parentDataset (str)
    • currentFieldPaths (List[str])
    • parentField (str)

property currentFieldPaths : List[str]

List of fields in hosting(current) SchemaMetadata that conform a foreign key. List can contain a single entry or multiple entries if several entries in hosting schema conform a foreign key in a single parent dataset.

property parentDataset : str

dataset that stores the resource.

property parentField : str

SchemaField@fieldPath that uniquely identify field in parent dataset that this field references.

DatasetFieldMappingClass

class datahub.metadata.schema_classes.DatasetFieldMappingClass(created, transformation, sourceFields, destinationField)

Bases: DictWrapper

Representation of mapping between fields in source dataset to the field in destination dataset

property created : AuditStampClass

Audit stamp containing who reported the field mapping and when

property destinationField : str

Destination field which is derived from source fields

property sourceFields : List[str]

Source fields from which the fine grained lineage is derived

property transformation : str | TransformationTypeClass | UDFTransformerClass

Transfomration function between the fields involved

DatasetFieldProfileClass

class datahub.metadata.schema_classes.DatasetFieldProfileClass(fieldPath, uniqueCount = None, uniqueProportion = None, nullCount = None, nullProportion = None, min = None, max = None, mean = None, median = None, stdev = None, quantiles = None, distinctValueFrequencies = None, histogram = None, sampleValues = None)

Bases: DictWrapper

Stats corresponding to fields in a dataset

  • Parameters:
    • fieldPath (str)
    • uniqueCount (Optional[int])
    • uniqueProportion (Optional[float])
    • nullCount (Optional[int])
    • nullProportion (Optional[float])
    • min (Optional[str])
    • max (Optional[str])
    • mean (Optional[str])
    • median (Optional[str])
    • stdev (Optional[str])
    • quantiles (Optional[List[QuantileClass]]) –
    • distinctValueFrequencies (Optional[List[ValueFrequencyClass]]) –
    • histogram (Optional[HistogramClass]) –
    • sampleValues (Optional[List[str]])

property distinctValueFrequencies : None | List[ValueFrequencyClass]

property fieldPath : str

property histogram : None | HistogramClass

property max : None | str

property mean : None | str

property median : None | str

property min : None | str

property nullCount : None | int

property nullProportion : None | float

property quantiles : None | List[QuantileClass]

property sampleValues : None | List[str]

property stdev : None | str

property uniqueCount : None | int

property uniqueProportion : None | float

DatasetFieldUsageCountsClass

class datahub.metadata.schema_classes.DatasetFieldUsageCountsClass(fieldPath, count)

Bases: DictWrapper

Records field-level usage counts for a given dataset

  • Parameters:
    • fieldPath (str)
    • count (int)

property count : int

Number of times the field has been used.

property fieldPath : str

The name of the field.

DatasetFilterClass

class datahub.metadata.schema_classes.DatasetFilterClass(type, sql = None)

Bases: DictWrapper

A definition of filters that should be used when querying an external Dataset or Table.

Note that this models should NOT be used for working with search / filter on DataHub Platform itself.

property sql : None | str

The raw where clause string which will be used for monitoring. Required if the type is SQL.

property type : str | DatasetFilterTypeClass

How the partition will be represented in this model.

In the future, we’ll likely add support for more structured predicates.

DatasetFilterTypeClass

class datahub.metadata.schema_classes.DatasetFilterTypeClass()

Bases: object

SQL = 'SQL'

DatasetKeyClass

class datahub.metadata.schema_classes.DatasetKeyClass(platform, name, origin)

Bases: _Aspect

Key for a Dataset

  • Parameters:

property name : str

Unique guid for dataset

property origin : str | FabricTypeClass

Fabric type where dataset belongs to or where it was generated.

property platform : str

Data platform urn associated with the dataset

DatasetLineageTypeClass

class datahub.metadata.schema_classes.DatasetLineageTypeClass()

Bases: object

The various types of supported dataset lineage

COPY = 'COPY'

TRANSFORMED = 'TRANSFORMED'

VIEW = 'VIEW'

DatasetProfileClass

class datahub.metadata.schema_classes.DatasetProfileClass(timestampMillis, eventGranularity = None, partitionSpec = None, messageId = None, rowCount = None, columnCount = None, fieldProfiles = None, sizeInBytes = None)

Bases: _Aspect

Stats corresponding to datasets

ASPECT_TYPE : ClassVar[str] = 'timeseries'

property columnCount : None | int

The total number of columns (or schema fields)

property eventGranularity : None | TimeWindowSizeClass

Granularity of the event if applicable

property fieldProfiles : None | List[DatasetFieldProfileClass]

Profiles for each column (or schema field)

property messageId : None | str

The optional messageId, if provided serves as a custom user-defined unique identifier for an aspect value.

property partitionSpec : PartitionSpecClass | None

The optional partition specification.

property rowCount : None | int

The total number of rows

property sizeInBytes : None | int

Storage size in bytes

property timestampMillis : int

The event timestamp field as epoch at UTC in milli seconds.

DatasetPropertiesClass

class datahub.metadata.schema_classes.DatasetPropertiesClass(customProperties = None, externalUrl = None, name = None, qualifiedName = None, description = None, uri = None, created = None, lastModified = None, tags = None)

Bases: _Aspect

Properties associated with a Dataset

  • Parameters:
    • customProperties (Optional[Dict[str, str]])
    • externalUrl (Optional[str])
    • name (Optional[str])
    • qualifiedName (Optional[str])
    • description (Optional[str])
    • uri (Optional[str])
    • created (Optional[TimeStampClass]) –
    • lastModified (Optional[TimeStampClass]) –
    • tags (Optional[List[str]])

property created : None | TimeStampClass

A timestamp documenting when the asset was created in the source Data Platform (not on DataHub)

property customProperties : Dict[str, str]

Custom property bag.

property description : None | str

Documentation of the dataset

property externalUrl : None | str

URL where the reference exist

property lastModified : None | TimeStampClass

A timestamp documenting when the asset was last modified in the source Data Platform (not on DataHub)

property name : None | str

Display name of the Dataset

property qualifiedName : None | str

Fully-qualified name of the Dataset

property tags : List[str]

[Legacy] Unstructured tags for the dataset. Structured tags can be applied via the GlobalTags aspect. This is now deprecated.

property uri : None | str

///dir/file_name. Uri should not include any environment specific properties. Some datasets might not have a standardized uri, which makes this field optional (i.e. kafka topic).

  • Type: The abstracted URI such as hdfs
  • Type: ///data/tracking/PageViewEvent, file

DatasetSnapshotClass

class datahub.metadata.schema_classes.DatasetSnapshotClass(urn, aspects)

Bases: DictWrapper

A metadata snapshot for a specific dataset entity.

property aspects : List[DatasetKeyClass | DatasetPropertiesClass | EditableDatasetPropertiesClass | DatasetDeprecationClass | DatasetUpstreamLineageClass | UpstreamLineageClass | InstitutionalMemoryClass | OwnershipClass | StatusClass | SchemaMetadataClass | EditableSchemaMetadataClass | GlobalTagsClass | GlossaryTermsClass | BrowsePathsClass | DataPlatformInstanceClass | ViewPropertiesClass | BrowsePathsV2Class]

The list of metadata aspects associated with the dataset. Depending on the use case, this can either be all, or a selection, of supported aspects.

property urn : str

URN for the entity the metadata snapshot is associated with.

DatasetUpstreamLineageClass

class datahub.metadata.schema_classes.DatasetUpstreamLineageClass(fieldMappings)

Bases: _Aspect

Fine Grained upstream lineage for fields in a dataset

property fieldMappings : List[DatasetFieldMappingClass]

Upstream to downstream field level lineage mappings

DatasetUsageStatisticsClass

class datahub.metadata.schema_classes.DatasetUsageStatisticsClass(timestampMillis, eventGranularity = None, partitionSpec = None, messageId = None, uniqueUserCount = None, totalSqlQueries = None, topSqlQueries = None, userCounts = None, fieldCounts = None)

Bases: _Aspect

Stats corresponding to dataset’s usage.

ASPECT_TYPE : ClassVar[str] = 'timeseries'

property eventGranularity : None | TimeWindowSizeClass

Granularity of the event if applicable

property fieldCounts : None | List[DatasetFieldUsageCountsClass]

Field-level usage stats

property messageId : None | str

The optional messageId, if provided serves as a custom user-defined unique identifier for an aspect value.

property partitionSpec : PartitionSpecClass | None

The optional partition specification.

property timestampMillis : int

The event timestamp field as epoch at UTC in milli seconds.

property topSqlQueries : None | List[str]

Frequent SQL queries; mostly makes sense for datasets in SQL databases

property totalSqlQueries : None | int

Total SQL query count

property uniqueUserCount : None | int

Unique user count

property userCounts : None | List[DatasetUserUsageCountsClass]

Users within this bucket, with frequency counts

DatasetUserUsageCountsClass

class datahub.metadata.schema_classes.DatasetUserUsageCountsClass(user, count, userEmail = None)

Bases: DictWrapper

Records a single user’s usage counts for a given resource

  • Parameters:
    • user (str)
    • count (int)
    • userEmail (Optional[str])

property count : int

Number of times the dataset has been used by the user.

property user : str

The unique id of the user.

property userEmail : None | str

If user_email is set, we attempt to resolve the user’s urn upon ingest

DateTypeClass

class datahub.metadata.schema_classes.DateTypeClass()

Bases: DictWrapper

Date field type.

DeploymentStatusClass

class datahub.metadata.schema_classes.DeploymentStatusClass()

Bases: object

Model endpoint statuses

CREATING = 'CREATING'

DELETING = 'DELETING'

FAILED = 'FAILED'

IN_SERVICE = 'IN_SERVICE'

OUT_OF_SERVICE = 'OUT_OF_SERVICE'

ROLLING_BACK = 'ROLLING_BACK'

UNKNOWN = 'UNKNOWN'

UPDATING = 'UPDATING'

DeprecationClass

class datahub.metadata.schema_classes.DeprecationClass(deprecated, note, actor, decommissionTime = None, replacement = None)

Bases: _Aspect

Deprecation status of an entity

  • Parameters:
    • deprecated (bool)
    • note (str)
    • actor (str)
    • decommissionTime (Optional[int])
    • replacement (Optional[str])

property actor : str

The user URN which will be credited for modifying this deprecation content.

property decommissionTime : None | int

The time user plan to decommission this entity.

property deprecated : bool

Whether the entity is deprecated.

property note : str

Additional information about the entity deprecation plan, such as the wiki, doc, RB.

property replacement : None | str

DisplayPropertiesClass

class datahub.metadata.schema_classes.DisplayPropertiesClass(colorHex = None, icon = None)

Bases: _Aspect

Properties related to how the entity is displayed in the Datahub UI

property colorHex : None | str

The color associated with the entity in Hex. For example #FFFFFF.

property icon : None | IconPropertiesClass

The icon associated with the entity

DocPropagationFeatureSettingsClass

class datahub.metadata.schema_classes.DocPropagationFeatureSettingsClass(enabled, config = None, configVersion = None, columnPropagationEnabled = None)

Bases: DictWrapper

  • Parameters:
    • enabled (bool)
    • config (Optional[str])
    • configVersion (Optional[str])
    • columnPropagationEnabled (Optional[bool])

property columnPropagationEnabled : bool

property config : None | str

The configuration for the feature, in JSON format.

property configVersion : None | str

The version of the configuration schema that has been used to serialize : the config.

If not provided, the version is assumed to be the latest version.

property enabled : bool

DocumentationAssociationClass

class datahub.metadata.schema_classes.DocumentationAssociationClass(documentation, attribution = None)

Bases: DictWrapper

Properties of applied documentation including the attribution of the doc

property attribution : None | MetadataAttributionClass

Information about who, why, and how this metadata was applied

property documentation : str

Description of this asset

DocumentationClass

class datahub.metadata.schema_classes.DocumentationClass(documentations)

Bases: _Aspect

Aspect used for storing all applicable documentations on assets. This aspect supports multiple documentations from different sources. There is an implicit assumption that there is only one documentation per

source.

For example, if there are two documentations from the same source, the : latest one will overwrite the previous one.

If there are two documentations from different sources, both will be : stored.

Future evolution considerations: The first entity that uses this aspect is Schema Field. We will expand this

aspect to other entities eventually.

The values of the documentation are not currently searchable. This will be : changed once this aspect develops opinion on which documentation entry is the authoritative one.

Ensuring that there is only one documentation per source is a business : rule that is not enforced by the aspect yet. This will currently be enforced by the application that uses this aspect. We will eventually enforce this rule in the aspect using AspectMutators.

property documentations : List[DocumentationAssociationClass]

Documentations associated with this asset. We could be receiving docs from different sources

DomainKeyClass

class datahub.metadata.schema_classes.DomainKeyClass(id)

Bases: _Aspect

Key for an Asset Domain

  • Parameters:id (str)

property id : str

A unique id for the domain. Should be separate from the name used for displaying a Domain.

DomainPropertiesClass

class datahub.metadata.schema_classes.DomainPropertiesClass(name, customProperties = None, description = None, created = None, parentDomain = None)

Bases: _Aspect

Information about a Domain

  • Parameters:
    • name (str)
    • customProperties (Optional[Dict[str, str]])
    • description (Optional[str])
    • created (Optional[AuditStampClass]) –
    • parentDomain (Optional[str])

property created : None | AuditStampClass

Created Audit stamp

property customProperties : Dict[str, str]

Custom property bag.

property description : None | str

Description of the Domain

property name : str

Display name of the Domain

property parentDomain : None | str

Parent of the domain

  • Type: Optional

DomainsClass

class datahub.metadata.schema_classes.DomainsClass(domains)

Bases: _Aspect

Links from an Asset to its Domains

  • Parameters:domains (List[str])

property domains : List[str]

The Domains attached to an Asset

DynamicFormAssignmentClass

class datahub.metadata.schema_classes.DynamicFormAssignmentClass(filter)

Bases: _Aspect

Information about how a form is assigned to entities dynamically. Provide a filter to match a set of entities instead of explicitly applying a form to specific entities.

property filter : FilterClass

The filter applied when assigning this form to entities. Entities that match this filter will have this form applied to them. Right now this filter only supports filtering by platform, entity type, container, and domain through the UI.

ERModelRelationshipCardinalityClass

class datahub.metadata.schema_classes.ERModelRelationshipCardinalityClass()

Bases: object

N_N = 'N_N'

N_ONE = 'N_ONE'

ONE_N = 'ONE_N'

ONE_ONE = 'ONE_ONE'

ERModelRelationshipKeyClass

class datahub.metadata.schema_classes.ERModelRelationshipKeyClass(id)

Bases: _Aspect

Key for a ERModelRelationship

  • Parameters:id (str)

property id : str

ERModelRelationshipPropertiesClass

class datahub.metadata.schema_classes.ERModelRelationshipPropertiesClass(name, source, destination, relationshipFieldMappings, customProperties = None, created = None, lastModified = None, cardinality = None)

Bases: _Aspect

Properties associated with a ERModelRelationship

property cardinality : str | ERModelRelationshipCardinalityClass

Cardinality of the relationship

property created : None | AuditStampClass

A timestamp documenting when the asset was created in the source Data Platform (not on DataHub)

property customProperties : Dict[str, str]

Custom property bag.

property destination : str

Second dataset in the erModelRelationship (no directionality)

property lastModified : None | AuditStampClass

A timestamp documenting when the asset was last modified in the source Data Platform (not on DataHub)

property name : str

Name of the ERModelRelation

property relationshipFieldMappings : List[RelationshipFieldMappingClass]

ERModelRelationFieldMapping (in future we can make it an array)

property source : str

First dataset in the erModelRelationship (no directionality)

EdgeClass

class datahub.metadata.schema_classes.EdgeClass(destinationUrn, sourceUrn = None, created = None, lastModified = None, properties = None)

Bases: DictWrapper

A common structure to represent all edges to entities when used inside aspects as collections This ensures that all edges have common structure around audit-stamps and will support PATCH, time-travel automatically.

  • Parameters:
    • destinationUrn (str)
    • sourceUrn (Optional[str])
    • created (Optional[AuditStampClass]) –
    • lastModified (Optional[AuditStampClass]) –
    • properties (Optional[Dict[str, str]])

property created : None | AuditStampClass

Audit stamp containing who created this relationship edge and when

property destinationUrn : str

Urn of the destination of this relationship edge.

property lastModified : None | AuditStampClass

Audit stamp containing who last modified this relationship edge and when

property properties : None | Dict[str, str]

A generic properties bag that allows us to store specific information on this graph edge.

property sourceUrn : None | str

Urn of the source of this relationship edge. If not specified, assumed to be the entity that this aspect belongs to.

EditableChartPropertiesClass

class datahub.metadata.schema_classes.EditableChartPropertiesClass(created = None, lastModified = None, deleted = None, description = None)

Bases: _Aspect

Stores editable changes made to properties. This separates changes made from ingestion pipelines and edits in the UI to avoid accidental overwrites of user-provided data by ingestion pipelines

property created : AuditStampClass

An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.

property deleted : None | AuditStampClass

An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.

property description : None | str

Edited documentation of the chart

property lastModified : AuditStampClass

An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.

EditableContainerPropertiesClass

class datahub.metadata.schema_classes.EditableContainerPropertiesClass(description = None)

Bases: _Aspect

Editable information about an Asset Container as defined on the DataHub Platform

  • Parameters:description (Optional[str])

property description : None | str

Description of the Asset Container as its received on the DataHub Platform

EditableDashboardPropertiesClass

class datahub.metadata.schema_classes.EditableDashboardPropertiesClass(created = None, lastModified = None, deleted = None, description = None)

Bases: _Aspect

Stores editable changes made to properties. This separates changes made from ingestion pipelines and edits in the UI to avoid accidental overwrites of user-provided data by ingestion pipelines

property created : AuditStampClass

An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.

property deleted : None | AuditStampClass

An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.

property description : None | str

Edited documentation of the dashboard

property lastModified : AuditStampClass

An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.

EditableDataFlowPropertiesClass

class datahub.metadata.schema_classes.EditableDataFlowPropertiesClass(created = None, lastModified = None, deleted = None, description = None)

Bases: _Aspect

Stores editable changes made to properties. This separates changes made from ingestion pipelines and edits in the UI to avoid accidental overwrites of user-provided data by ingestion pipelines

property created : AuditStampClass

An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.

property deleted : None | AuditStampClass

An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.

property description : None | str

Edited documentation of the data flow

property lastModified : AuditStampClass

An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.

EditableDataJobPropertiesClass

class datahub.metadata.schema_classes.EditableDataJobPropertiesClass(created = None, lastModified = None, deleted = None, description = None)

Bases: _Aspect

Stores editable changes made to properties. This separates changes made from ingestion pipelines and edits in the UI to avoid accidental overwrites of user-provided data by ingestion pipelines

property created : AuditStampClass

An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.

property deleted : None | AuditStampClass

An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.

property description : None | str

Edited documentation of the data job

property lastModified : AuditStampClass

An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.

EditableDatasetPropertiesClass

class datahub.metadata.schema_classes.EditableDatasetPropertiesClass(created = None, lastModified = None, deleted = None, description = None, name = None)

Bases: _Aspect

EditableDatasetProperties stores editable changes made to dataset properties. This separates changes made from ingestion pipelines and edits in the UI to avoid accidental overwrites of user-provided data by ingestion pipelines

property created : AuditStampClass

An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.

property deleted : None | AuditStampClass

An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.

property description : None | str

Documentation of the dataset

property lastModified : AuditStampClass

An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.

property name : None | str

Editable display name of the Dataset

EditableERModelRelationshipPropertiesClass

class datahub.metadata.schema_classes.EditableERModelRelationshipPropertiesClass(created = None, lastModified = None, deleted = None, description = None, name = None)

Bases: _Aspect

EditableERModelRelationProperties stores editable changes made to erModelRelationship properties. This separates changes made from ingestion pipelines and edits in the UI to avoid accidental overwrites of user-provided data by ingestion pipelines

property created : AuditStampClass

An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.

property deleted : None | AuditStampClass

An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.

property description : None | str

Documentation of the erModelRelationship

property lastModified : AuditStampClass

An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.

property name : None | str

Display name of the ERModelRelation

EditableMLFeaturePropertiesClass

class datahub.metadata.schema_classes.EditableMLFeaturePropertiesClass(description = None)

Bases: _Aspect

Properties associated with a MLFeature editable from the UI

  • Parameters:description (Optional[str])

property description : None | str

Documentation of the MLFeature

EditableMLFeatureTablePropertiesClass

class datahub.metadata.schema_classes.EditableMLFeatureTablePropertiesClass(description = None)

Bases: _Aspect

Properties associated with a MLFeatureTable editable from the ui

  • Parameters:description (Optional[str])

property description : None | str

Documentation of the MLFeatureTable

EditableMLModelGroupPropertiesClass

class datahub.metadata.schema_classes.EditableMLModelGroupPropertiesClass(description = None)

Bases: _Aspect

Properties associated with an ML Model Group editable from the UI

  • Parameters:description (Optional[str])

property description : None | str

Documentation of the ml model group

EditableMLModelPropertiesClass

class datahub.metadata.schema_classes.EditableMLModelPropertiesClass(description = None)

Bases: _Aspect

Properties associated with a ML Model editable from the UI

  • Parameters:description (Optional[str])

property description : None | str

Documentation of the ml model

EditableMLPrimaryKeyPropertiesClass

class datahub.metadata.schema_classes.EditableMLPrimaryKeyPropertiesClass(description = None)

Bases: _Aspect

Properties associated with a MLPrimaryKey editable from the UI

  • Parameters:description (Optional[str])

property description : None | str

Documentation of the MLPrimaryKey

EditableNotebookPropertiesClass

class datahub.metadata.schema_classes.EditableNotebookPropertiesClass(created = None, lastModified = None, deleted = None, description = None)

Bases: _Aspect

Stores editable changes made to properties. This separates changes made from ingestion pipelines and edits in the UI to avoid accidental overwrites of user-provided data by ingestion pipelines Note: This is IN BETA version

property created : AuditStampClass

An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.

property deleted : None | AuditStampClass

An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.

property description : None | str

Edited documentation of the Notebook

property lastModified : AuditStampClass

An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.

EditableSchemaFieldInfoClass

class datahub.metadata.schema_classes.EditableSchemaFieldInfoClass(fieldPath, description = None, globalTags = None, glossaryTerms = None)

Bases: DictWrapper

SchemaField to describe metadata related to dataset schema.

property description : None | str

Description

property fieldPath : str

FieldPath uniquely identifying the SchemaField this metadata is associated with

property globalTags : None | GlobalTagsClass

Tags associated with the field

property glossaryTerms : None | GlossaryTermsClass

Glossary terms associated with the field

EditableSchemaMetadataClass

class datahub.metadata.schema_classes.EditableSchemaMetadataClass(editableSchemaFieldInfo, created = None, lastModified = None, deleted = None)

Bases: _Aspect

EditableSchemaMetadata stores editable changes made to schema metadata. This separates changes made from ingestion pipelines and edits in the UI to avoid accidental overwrites of user-provided data by ingestion pipelines.

property created : AuditStampClass

An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.

property deleted : None | AuditStampClass

An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.

property editableSchemaFieldInfo : List[EditableSchemaFieldInfoClass]

Client provided a list of fields from document schema.

property lastModified : AuditStampClass

An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.

EmailNotificationSettingsClass

class datahub.metadata.schema_classes.EmailNotificationSettingsClass(email)

Bases: DictWrapper

Email Notification settings for an actor.

  • Parameters:email (str)

property email : str

Optional user or group email address

EmbedClass

class datahub.metadata.schema_classes.EmbedClass(renderUrl = None)

Bases: _Aspect

Information regarding rendering an embed for an asset.

  • Parameters:renderUrl (Optional[str])

property renderUrl : None | str

An embed URL to be rendered inside of an iframe.

EntityChangeEventClass

class datahub.metadata.schema_classes.EntityChangeEventClass(entityType, entityUrn, category, operation, auditStamp, version, modifier = None, parameters = None)

Bases: DictWrapper

Shared fields for all entity change events.

  • Parameters:
    • entityType (str)
    • entityUrn (str)
    • category (str)
    • operation (str)
    • auditStamp (AuditStampClass) –
    • version (int)
    • modifier (Optional[str])
    • parameters (Optional[ParametersClass]) –

property auditStamp : AuditStampClass

Audit stamp of the operation

property category : str

The category type (TAG, GLOSSARY_TERM, OWNERSHIP, TECHNICAL_SCHEMA, etc). This is used to determine what the rest of the schema will look like.

property entityType : str

The type of the entity affected. Corresponds to the entity registry, e.g. ‘dataset’, ‘chart’, ‘dashboard’, etc.

property entityUrn : str

The urn of the entity which was affected.

property modifier : None | str

The urn of the entity which was affected.

property operation : str

The operation type. This is used to determine what the rest of the schema will look like.

property parameters : None | ParametersClass

Arbitrary key-value parameters corresponding to the event.

property version : int

The version of the event type, incremented in integers.

EntityTypeInfoClass

class datahub.metadata.schema_classes.EntityTypeInfoClass(qualifiedName, displayName = None, description = None)

Bases: _Aspect

  • Parameters:
    • qualifiedName (str)
    • displayName (Optional[str])
    • description (Optional[str])

property description : None | str

what is it for?

  • Type: A description for the Entity Type

property displayName : None | str

The display name for the Entity Type.

property qualifiedName : str

The fully qualified name for the entity type, which usually consists of a namespace plus an identifier or name, e.g. datahub.dataset

EntityTypeKeyClass

class datahub.metadata.schema_classes.EntityTypeKeyClass(id)

Bases: _Aspect

  • Parameters:id (str)

property id : str

A unique id for an entity type. Usually this will be a unique namespace + entity name.

EnumTypeClass

class datahub.metadata.schema_classes.EnumTypeClass()

Bases: DictWrapper

Enum field type.

EspressoSchemaClass

class datahub.metadata.schema_classes.EspressoSchemaClass(documentSchema, tableSchema)

Bases: DictWrapper

Schema text of an espresso table schema.

  • Parameters:
    • documentSchema (str)
    • tableSchema (str)

property documentSchema : str

The native espresso document schema.

property tableSchema : str

The espresso table schema definition.

EthicalConsiderationsClass

class datahub.metadata.schema_classes.EthicalConsiderationsClass(data = None, humanLife = None, mitigations = None, risksAndHarms = None, useCases = None)

Bases: _Aspect

This section is intended to demonstrate the ethical considerations that went into MLModel development, surfacing ethical challenges and solutions to stakeholders.

  • Parameters:
    • data (Optional[List[str]])
    • humanLife (Optional[List[str]])
    • mitigations (Optional[List[str]])
    • risksAndHarms (Optional[List[str]])
    • useCases (Optional[List[str]])

property data : None | List[str]

Does the MLModel use any sensitive data (e.g., protected classes)?

property humanLife : None | List[str]

Is the MLModel intended to inform decisions about matters central to human life or flourishing - e.g., health or safety? Or could it be used in such a way?

property mitigations : None | List[str]

What risk mitigation strategies were used during MLModel development?

property risksAndHarms : None | List[str]

What risks may be present in MLModel usage? Try to identify the potential recipients, likelihood, and magnitude of harms. If these cannot be determined, note that they were considered but remain unknown.

property useCases : None | List[str]

Are there any known MLModel use cases that are especially fraught? This may connect directly to the intended use section

EvaluationDataClass

class datahub.metadata.schema_classes.EvaluationDataClass(evaluationData)

Bases: _Aspect

All referenced datasets would ideally point to any set of documents that provide visibility into the source and composition of the dataset.

property evaluationData : List[BaseDataClass]

Details on the dataset(s) used for the quantitative analyses in the MLModel

ExecutionRequestInputClass

class datahub.metadata.schema_classes.ExecutionRequestInputClass(task, args, executorId, source, requestedAt, actorUrn = None)

Bases: _Aspect

An request to execution some remote logic or action. TODO: Determine who is responsible for emitting execution request success or failure. Executor?

  • Parameters:

property actorUrn : None | str

Urn of the actor who created this execution request.

property args : Dict[str, str]

Arguments provided to the task

property executorId : str

specify a specific executor to route the request to. If none is provided, a “default” executor is used.

  • Type: Advanced

property requestedAt : int

Time at which the execution request input was created

property source : ExecutionRequestSourceClass

Source which created the execution request

property task : str

The name of the task to execute, for example RUN_INGEST

ExecutionRequestKeyClass

class datahub.metadata.schema_classes.ExecutionRequestKeyClass(id)

Bases: _Aspect

Key for an DataHub Execution Request

  • Parameters:id (str)

property id : str

A unique id for the DataHub execution request.

ExecutionRequestResultClass

class datahub.metadata.schema_classes.ExecutionRequestResultClass(status, report = None, structuredReport = None, startTimeMs = None, durationMs = None)

Bases: _Aspect

The result of an execution request

  • Parameters:
    • status (str)
    • report (Optional[str])
    • structuredReport (Optional[StructuredExecutionReportClass]) –
    • startTimeMs (Optional[int])
    • durationMs (Optional[int])

property durationMs : None | int

Duration in milliseconds

property report : None | str

The pretty-printed execution report.

property startTimeMs : None | int

Time at which the request was created

property status : str

The status of the execution request

property structuredReport : None | StructuredExecutionReportClass

A structured report if available.

ExecutionRequestSignalClass

class datahub.metadata.schema_classes.ExecutionRequestSignalClass(signal, createdAt, executorId = None)

Bases: _Aspect

An signal sent to a running execution request

  • Parameters:

property createdAt : AuditStampClass

Audit Stamp

property executorId : None | str

specify a specific executor to route the request to. If none is provided, a “default” executor is used.

  • Type: Advanced

property signal : str

The signal to issue, e.g. KILL

ExecutionRequestSourceClass

class datahub.metadata.schema_classes.ExecutionRequestSourceClass(type, ingestionSource = None)

Bases: DictWrapper

  • Parameters:
    • type (str)
    • ingestionSource (Optional[str])

property ingestionSource : None | str

The urn of the ingestion source associated with the ingestion request. Present if type is INGESTION_SOURCE

property type : str

The type of the execution request source, e.g. INGESTION_SOURCE

FabricTypeClass

class datahub.metadata.schema_classes.FabricTypeClass()

Bases: object

Fabric group type

CORP = 'CORP'

DEV = 'DEV'

EI = 'EI'

NON_PROD = 'NON_PROD'

PRD = 'PRD'

PRE = 'PRE'

PROD = 'PROD'

QA = 'QA'

RVW = 'RVW'

SANDBOX = 'SANDBOX'

SBX = 'SBX'

SIT = 'SIT'

STG = 'STG'

TEST = 'TEST'

TST = 'TST'

UAT = 'UAT'

FieldAssertionInfoClass

class datahub.metadata.schema_classes.FieldAssertionInfoClass(type, entity, fieldValuesAssertion = None, fieldMetricAssertion = None, filter = None)

Bases: DictWrapper

Attributes defining a Field Assertion.

property entity : str

The entity targeted by this Field check.

property fieldMetricAssertion : None | FieldMetricAssertionClass

The definition of an assertion that validates a common metric obtained about a field / column for a set of rows. This type of assertion verifies that the value of a high-level metric obtained by aggregating over a column meets expectations

property fieldValuesAssertion : None | FieldValuesAssertionClass

The definition of an assertion that validates individual values of a field / column for a set of rows. This type of assertion verifies that each column value meets a particular requirement.

property filter : None | DatasetFilterClass

A definition of the specific filters that should be applied, when performing monitoring. If not provided, there is no filter, and the full table is under consideration. If using DataHub Dataset Profiles as the assertion source type, the value of this field will be ignored.

property type : str | FieldAssertionTypeClass

The type of the field assertion being monitored.

FieldAssertionTypeClass

class datahub.metadata.schema_classes.FieldAssertionTypeClass()

Bases: object

FIELD_METRIC = 'FIELD_METRIC'

FIELD_VALUES = 'FIELD_VALUES'

FieldFormPromptAssociationClass

class datahub.metadata.schema_classes.FieldFormPromptAssociationClass(fieldPath, lastModified)

Bases: DictWrapper

Information about the status of a particular prompt for a specific schema field on an entity.

property fieldPath : str

The field path on a schema field.

property lastModified : AuditStampClass

The last time this prompt was touched for the field on the entity (set, unset)

FieldMetricAssertionClass

class datahub.metadata.schema_classes.FieldMetricAssertionClass(field, metric, operator, parameters = None)

Bases: DictWrapper

Attributes defining a field metric assertion, which asserts an expectation against a common metric derived from the set of field / column values, for example: max, min, median, null count, null percentage, unique count, unique percentage, and more.

property field : SchemaFieldSpecClass

The field under evaluation

property metric : str | FieldMetricTypeClass

The specific metric to assert against. This is the value that will be obtained by applying a standard operation, such as an aggregation, to the selected field.

property operator : str | AssertionStdOperatorClass

The predicate to evaluate against the metric for the field / column. Depending on the operator, parameters may be required in order to successfully evaluate the assertion against the metric value.

property parameters : None | AssertionStdParametersClass

Standard parameters required for the assertion. e.g. min_value, max_value, value, columns

FieldMetricTypeClass

class datahub.metadata.schema_classes.FieldMetricTypeClass()

Bases: object

A standard metric that can be derived from the set of values for a specific field / column of a dataset / table.

EMPTY_COUNT = 'EMPTY_COUNT'

EMPTY_PERCENTAGE = 'EMPTY_PERCENTAGE'

MAX = 'MAX'

MAX_LENGTH = 'MAX_LENGTH'

MEAN = 'MEAN'

MEDIAN = 'MEDIAN'

MIN = 'MIN'

MIN_LENGTH = 'MIN_LENGTH'

NEGATIVE_COUNT = 'NEGATIVE_COUNT'

NEGATIVE_PERCENTAGE = 'NEGATIVE_PERCENTAGE'

NULL_COUNT = 'NULL_COUNT'

NULL_PERCENTAGE = 'NULL_PERCENTAGE'

STDDEV = 'STDDEV'

UNIQUE_COUNT = 'UNIQUE_COUNT'

UNIQUE_PERCENTAGE = 'UNIQUE_PERCENTAGE'

ZERO_COUNT = 'ZERO_COUNT'

ZERO_PERCENTAGE = 'ZERO_PERCENTAGE'

FieldTransformClass

class datahub.metadata.schema_classes.FieldTransformClass(type)

Bases: DictWrapper

Definition of a transform applied to the values of a column / field. Note that the applicability of a field transform ultimately depends on the native type of the field / column.

Model has single field to permit extension.

property type : str | FieldTransformTypeClass

The type of the field transform, e.g. the transformation function / operator to apply.

FieldTransformTypeClass

class datahub.metadata.schema_classes.FieldTransformTypeClass()

Bases: object

LENGTH = 'LENGTH'

FieldUsageCountsClass

class datahub.metadata.schema_classes.FieldUsageCountsClass(fieldName, count)

Bases: DictWrapper

Records field-level usage counts for a given resource

  • Parameters:
    • fieldName (str)
    • count (int)

property count : int

property fieldName : str

FieldValuesAssertionClass

class datahub.metadata.schema_classes.FieldValuesAssertionClass(field, operator, failThreshold, transform = None, parameters = None, excludeNulls = None)

Bases: DictWrapper

Attributes defining a field values assertion, which asserts that the values for a field / column of a dataset / table matches a set of expectations.

In other words, this type of assertion acts as a semantic constraint applied to fields for a specific column.

TODO: We should display the “failed row count” to the user if the column fails the verification rules. TODO: Determine whether we need an “operator” that can be applied to the field.

property excludeNulls : bool

Whether to ignore or allow nulls when running the values assertion. (i.e. consider only non-null values) using operators OTHER than the IS_NULL operator.

Defaults to true, allowing null values.

property failThreshold : FieldValuesFailThresholdClass

Additional customization about when the assertion should be officially considered failing.

property field : SchemaFieldSpecClass

The field under evaluation

property operator : str | AssertionStdOperatorClass

The predicate to evaluate against a single value of the field. Depending on the operator, parameters may be required in order to successfully evaluate the assertion against the field value.

property parameters : None | AssertionStdParametersClass

Standard parameters required for the assertion. e.g. min_value, max_value, value, columns

property transform : None | FieldTransformClass

An optional transform to apply to field values before evaluating the operator.

If none is applied, the field value will be compared as is.

FieldValuesFailThresholdClass

class datahub.metadata.schema_classes.FieldValuesFailThresholdClass(type = None, value = None)

Bases: DictWrapper

property type : str | FieldValuesFailThresholdTypeClass

The type of failure threshold. Either based on the number of column values (rows) that fail the expectations, or the percentage of the total rows under consideration.

property value : int

By default this is 0, meaning that ALL column values (i.e. rows) must meet the defined expectations.

FieldValuesFailThresholdTypeClass

class datahub.metadata.schema_classes.FieldValuesFailThresholdTypeClass()

Bases: object

COUNT = 'COUNT'

PERCENTAGE = 'PERCENTAGE'

FilterClass

class datahub.metadata.schema_classes.FilterClass(or_ = None, criteria = None)

Bases: DictWrapper

The filter for finding a record or a collection of records

property criteria : None | List[CriterionClass]

Deprecated! A list of conjunctive criterion for the filter. If “or” field is provided, then this field is ignored.

property or_ : None | List[ConjunctiveCriterionClass]

A list of disjunctive criterion for the filter. (or operation to combine filters)

FineGrainedLineageClass

class datahub.metadata.schema_classes.FineGrainedLineageClass(upstreamType, downstreamType, upstreams = None, downstreams = None, transformOperation = None, confidenceScore = None, query = None)

Bases: DictWrapper

A fine-grained lineage from upstream fields/datasets to downstream field(s)

property confidenceScore : float

The confidence in this lineage between 0 (low confidence) and 1 (high confidence)

property downstreamType : str | FineGrainedLineageDownstreamTypeClass

The type of downstream field(s)

property downstreams : None | List[str]

Downstream fields in the lineage

property query : None | str

The query that was used to generate this lineage. Present only if the lineage was generated from a detected query.

property transformOperation : None | str

The transform operation applied to the upstream entities to produce the downstream field(s)

property upstreamType : str | FineGrainedLineageUpstreamTypeClass

The type of upstream entity

property upstreams : None | List[str]

Upstream entities in the lineage

FineGrainedLineageDownstreamTypeClass

class datahub.metadata.schema_classes.FineGrainedLineageDownstreamTypeClass()

Bases: object

The type of downstream field(s) in a fine-grained lineage

FIELD = 'FIELD'

FIELD_SET = 'FIELD_SET'

FineGrainedLineageUpstreamTypeClass

class datahub.metadata.schema_classes.FineGrainedLineageUpstreamTypeClass()

Bases: object

The type of upstream entity in a fine-grained lineage

DATASET = 'DATASET'

FIELD_SET = 'FIELD_SET'

NONE = 'NONE'

FixedIntervalScheduleClass

class datahub.metadata.schema_classes.FixedIntervalScheduleClass(unit, multiple = None)

Bases: DictWrapper

Attributes defining a relative fixed interval SLA schedule.

property multiple : int

How many units. Defaults to 1.

property unit : str | CalendarIntervalClass

Interval unit such as minute/hour/day etc.

FixedTypeClass

class datahub.metadata.schema_classes.FixedTypeClass()

Bases: DictWrapper

Fixed field type.

ForeignKeyConstraintClass

class datahub.metadata.schema_classes.ForeignKeyConstraintClass(name, foreignFields, sourceFields, foreignDataset)

Bases: DictWrapper

Description of a foreign key constraint in a schema.

  • Parameters:
    • name (str)
    • foreignFields (List[str])
    • sourceFields (List[str])
    • foreignDataset (str)

property foreignDataset : str

Reference to the foreign dataset for ease of lookup

property foreignFields : List[str]

Fields the constraint maps to on the foreign dataset

property name : str

Name of the constraint, likely provided from the source

property sourceFields : List[str]

Fields the constraint maps to on the source dataset

ForeignKeySpecClass

class datahub.metadata.schema_classes.ForeignKeySpecClass(foreignKey)

Bases: DictWrapper

Description of a foreign key in a schema.

property foreignKey : DatasetFieldForeignKeyClass | UrnForeignKeyClass

Foreign key definition in metadata schema.

FormActorAssignmentClass

class datahub.metadata.schema_classes.FormActorAssignmentClass(owners = None, groups = None, users = None)

Bases: DictWrapper

  • Parameters:
    • owners (Optional[bool])
    • groups (Optional[List[str]])
    • users (Optional[List[str]])

property groups : None | List[str]

Specific set of groups that are targeted by this form assignment.

  • Type: Optional

property owners : bool

Whether the form should be assigned to the owners of assets that it is applied to. This is the default.

property users : None | List[str]

Specific set of users that are targeted by this form assignment.

  • Type: Optional

FormAssociationClass

class datahub.metadata.schema_classes.FormAssociationClass(urn, incompletePrompts = None, completedPrompts = None)

Bases: DictWrapper

Properties of an applied form.

property completedPrompts : List[FormPromptAssociationClass]

A list of prompts that have been completed for this form.

property incompletePrompts : List[FormPromptAssociationClass]

A list of prompts that are not yet complete for this form.

property urn : str

Urn of the applied form

FormInfoClass

class datahub.metadata.schema_classes.FormInfoClass(name, description = None, type = None, prompts = None, actors = None)

Bases: _Aspect

Information about a form to help with filling out metadata on entities.

property actors : FormActorAssignmentClass

Who the form is assigned to, e.g. who should see the form when visiting the entity page or governance center

property description : None | str

Description of the form

property name : str

Display name of the form

property prompts : List[FormPromptClass]

List of prompts to present to the user to encourage filling out metadata

property type : str | FormTypeClass

The type of this form

FormKeyClass

class datahub.metadata.schema_classes.FormKeyClass(id)

Bases: _Aspect

Key for a Form

  • Parameters:id (str)

property id : str

Unique id for the form.

FormPromptAssociationClass

class datahub.metadata.schema_classes.FormPromptAssociationClass(id, lastModified, fieldAssociations = None)

Bases: DictWrapper

Information about the status of a particular prompt. Note that this is where we can add additional information about individual responses: actor, timestamp, and the response itself.

property fieldAssociations : None | FormPromptFieldAssociationsClass

Optional information about the field-level prompt associations.

property id : str

The id for the prompt. This must be GLOBALLY UNIQUE.

property lastModified : AuditStampClass

The last time this prompt was touched for the entity (set, unset)

FormPromptClass

class datahub.metadata.schema_classes.FormPromptClass(id, title, type, description = None, structuredPropertyParams = None, required = None)

Bases: DictWrapper

A prompt to present to the user to encourage filling out metadata

property description : None | str

The description of this prompt

property id : str

The unique id for this prompt. This must be GLOBALLY unique.

property required : bool

Whether the prompt is required to be completed, in order for the form to be marked as complete.

property structuredPropertyParams : None | StructuredPropertyParamsClass

An optional set of information specific to structured properties prompts. This should be filled out if the prompt is type STRUCTURED_PROPERTY or FIELDS_STRUCTURED_PROPERTY.

property title : str

The title of this prompt

property type : str | FormPromptTypeClass

The type of prompt

FormPromptFieldAssociationsClass

class datahub.metadata.schema_classes.FormPromptFieldAssociationsClass(completedFieldPrompts = None, incompleteFieldPrompts = None)

Bases: DictWrapper

Information about the field-level prompt associations on a top-level prompt association.

property completedFieldPrompts : None | List[FieldFormPromptAssociationClass]

A list of field-level prompt associations that are not yet complete for this form.

property incompleteFieldPrompts : None | List[FieldFormPromptAssociationClass]

A list of field-level prompt associations that are complete for this form.

FormPromptTypeClass

class datahub.metadata.schema_classes.FormPromptTypeClass()

Bases: object

FIELDS_STRUCTURED_PROPERTY = 'FIELDS_STRUCTURED_PROPERTY'

STRUCTURED_PROPERTY = 'STRUCTURED_PROPERTY'

FormTypeClass

class datahub.metadata.schema_classes.FormTypeClass()

Bases: object

COMPLETION = 'COMPLETION'

VERIFICATION = 'VERIFICATION'

FormVerificationAssociationClass

class datahub.metadata.schema_classes.FormVerificationAssociationClass(form, lastModified = None)

Bases: DictWrapper

An association between a verification and an entity that has been granted via completion of one or more forms of type ‘VERIFICATION’.

property form : str

The urn of the form that granted this verification.

property lastModified : None | AuditStampClass

An audit stamp capturing who and when verification was applied for this form.

FormsClass

class datahub.metadata.schema_classes.FormsClass(incompleteForms, completedForms, verifications = None)

Bases: _Aspect

Forms that are assigned to this entity to be filled out

property completedForms : List[FormAssociationClass]

All complete forms assigned to the entity.

property incompleteForms : List[FormAssociationClass]

All incomplete forms assigned to the entity.

property verifications : List[FormVerificationAssociationClass]

Verifications that have been applied to the entity via completed forms.

FreshnessAssertionInfoClass

class datahub.metadata.schema_classes.FreshnessAssertionInfoClass(type, entity, schedule, filter = None)

Bases: DictWrapper

Attributes defining a Freshness Assertion.

property entity : str

The entity targeted by this Freshness check.

property filter : None | DatasetFilterClass

A definition of the specific filters that should be applied, when performing monitoring. If not provided, there is no filter, and the full table is under consideration.

property schedule : FreshnessAssertionScheduleClass

Produce FAILURE Assertion Result if the asset is not updated on the cadence and within the time range described by the schedule.

property type : str | FreshnessAssertionTypeClass

The type of the freshness assertion being monitored.

FreshnessAssertionScheduleClass

class datahub.metadata.schema_classes.FreshnessAssertionScheduleClass(type, cron = None, fixedInterval = None)

Bases: DictWrapper

Attributes defining a single Freshness schedule.

property cron : None | FreshnessCronScheduleClass

A cron schedule. This field is required when type is CRON.

property fixedInterval : None | FixedIntervalScheduleClass

A fixed interval schedule. This field is required when type is FIXED_INTERVAL.

property type : str | FreshnessAssertionScheduleTypeClass

The type of a Freshness Assertion Schedule.

Once we support data-time-relative schedules (e.g. schedules relative to time partitions), we will add those schedule types here.

FreshnessAssertionScheduleTypeClass

class datahub.metadata.schema_classes.FreshnessAssertionScheduleTypeClass()

Bases: object

CRON = 'CRON'

FIXED_INTERVAL = 'FIXED_INTERVAL'

SINCE_THE_LAST_CHECK = 'SINCE_THE_LAST_CHECK'

FreshnessAssertionTypeClass

class datahub.metadata.schema_classes.FreshnessAssertionTypeClass()

Bases: object

DATASET_CHANGE = 'DATASET_CHANGE'

DATA_JOB_RUN = 'DATA_JOB_RUN'

FreshnessContractClass

class datahub.metadata.schema_classes.FreshnessContractClass(assertion)

Bases: DictWrapper

A contract pertaining to the operational SLAs of a physical data asset

  • Parameters:assertion (str)

property assertion : str

The assertion representing the SLA contract.

FreshnessCronScheduleClass

class datahub.metadata.schema_classes.FreshnessCronScheduleClass(cron, timezone, windowStartOffsetMs = None)

Bases: DictWrapper

Attributes defining a CRON-formatted schedule used for defining a freshness assertion.

  • Parameters:
    • cron (str)
    • timezone (str)
    • windowStartOffsetMs (Optional[int])

property cron : str

A cron-formatted execution interval, as a cron string, e.g. 1 * * * *

property timezone : str

Timezone in which the cron interval applies, e.g. America/Los Angeles

property windowStartOffsetMs : None | int

An optional offset in milliseconds to SUBTRACT from the timestamp generated by the cron schedule to generate the lower bounds of the “freshness window”, or the window of time in which an event must have occurred in order for the Freshness check to be considering passing.

If left empty, the start of the SLA window will be the _end_ of the previously evaluated Freshness window.

GenericAspectClass

class datahub.metadata.schema_classes.GenericAspectClass(value, contentType)

Bases: DictWrapper

Generic record structure for serializing an Aspect

  • Parameters:
    • value (bytes)
    • contentType (str)

property contentType : str

The content type, which represents the fashion in which the aspect was serialized. The only type currently supported is application/json.

property value : bytes

The value of the aspect, serialized as bytes.

GenericPayloadClass

class datahub.metadata.schema_classes.GenericPayloadClass(value, contentType)

Bases: DictWrapper

Generic payload record structure for serializing a Platform Event.

  • Parameters:
    • value (bytes)
    • contentType (str)

property contentType : str

The content type, which represents the fashion in which the event was serialized. The only type currently supported is application/json.

property value : bytes

The value of the event, serialized as bytes.

GlobalSettingsInfoClass

class datahub.metadata.schema_classes.GlobalSettingsInfoClass(sso = None, views = None, docPropagation = None)

Bases: _Aspect

DataHub Global platform settings. Careful - these should not be modified by the outside world!

property docPropagation : DocPropagationFeatureSettingsClass | None

Settings related to the documentation propagation feature

property sso : None | SsoSettingsClass

SSO integrations between DataHub and identity providers

property views : None | GlobalViewsSettingsClass

Settings related to the Views Feature

GlobalSettingsKeyClass

class datahub.metadata.schema_classes.GlobalSettingsKeyClass(id)

Bases: _Aspect

Key for a Global Settings

  • Parameters:id (str)

property id : str

li:globalSettings:0

  • Type: Id for the settings. There should be only 1 global settings urn
  • Type: urn

GlobalTagsClass

class datahub.metadata.schema_classes.GlobalTagsClass(tags)

Bases: _Aspect

Tag aspect used for applying tags to an entity

property tags : List[TagAssociationClass]

Tags associated with a given entity

GlobalViewsSettingsClass

class datahub.metadata.schema_classes.GlobalViewsSettingsClass(defaultView = None)

Bases: DictWrapper

Settings for DataHub Views feature.

  • Parameters:defaultView (Optional[str])

property defaultView : None | str

The default View for the instance, or organization.

GlossaryNodeInfoClass

class datahub.metadata.schema_classes.GlossaryNodeInfoClass(definition, customProperties = None, parentNode = None, name = None, id = None)

Bases: _Aspect

Properties associated with a GlossaryNode

  • Parameters:
    • definition (str)
    • customProperties (Optional[Dict[str, str]])
    • parentNode (Optional[str])
    • name (Optional[str])
    • id (Optional[str])

property customProperties : Dict[str, str]

Custom property bag.

property definition : str

Definition of business node

property id : None | str

Optional id for the GlossaryNode

property name : None | str

Display name of the node

property parentNode : None | str

Parent node of the glossary term

GlossaryNodeKeyClass

class datahub.metadata.schema_classes.GlossaryNodeKeyClass(name)

Bases: _Aspect

Key for a GlossaryNode

  • Parameters:name (str)

property name : str

GlossaryNodeSnapshotClass

class datahub.metadata.schema_classes.GlossaryNodeSnapshotClass(urn, aspects)

Bases: DictWrapper

A metadata snapshot for a specific GlossaryNode entity.

property aspects : List[GlossaryNodeKeyClass | GlossaryNodeInfoClass | OwnershipClass | StatusClass]

The list of metadata aspects associated with the GlossaryNode. Depending on the use case, this can either be all, or a selection, of supported aspects.

property urn : str

URN for the entity the metadata snapshot is associated with.

GlossaryRelatedTermsClass

class datahub.metadata.schema_classes.GlossaryRelatedTermsClass(isRelatedTerms = None, hasRelatedTerms = None, values = None, relatedTerms = None)

Bases: _Aspect

Has A / Is A lineage information about a glossary Term reporting the lineage

  • Parameters:
    • isRelatedTerms (Optional[List[str]])
    • hasRelatedTerms (Optional[List[str]])
    • values (Optional[List[str]])
    • relatedTerms (Optional[List[str]])

property hasRelatedTerms : None | List[str]

The relationship Has A with glossary term

property isRelatedTerms : None | List[str]

The relationship Is A with glossary term

property relatedTerms : None | List[str]

The relationship isRelatedTo with glossary term

property values : None | List[str]

The relationship Has Value with glossary term. These are fixed value a term has. For example a ColorEnum where RED, GREEN and YELLOW are fixed values.

GlossaryTermAssociationClass

class datahub.metadata.schema_classes.GlossaryTermAssociationClass(urn, actor = None, context = None, attribution = None)

Bases: DictWrapper

Properties of an applied glossary term.

property actor : None | str

The user URN which will be credited for adding associating this term to the entity

property attribution : None | MetadataAttributionClass

Information about who, why, and how this metadata was applied

property context : None | str

Additional context about the association

property urn : str

Urn of the applied glossary term

GlossaryTermInfoClass

class datahub.metadata.schema_classes.GlossaryTermInfoClass(definition, termSource, customProperties = None, id = None, name = None, parentNode = None, sourceRef = None, sourceUrl = None, rawSchema = None)

Bases: _Aspect

Properties associated with a GlossaryTerm

  • Parameters:
    • definition (str)
    • termSource (str)
    • customProperties (Optional[Dict[str, str]])
    • id (Optional[str])
    • name (Optional[str])
    • parentNode (Optional[str])
    • sourceRef (Optional[str])
    • sourceUrl (Optional[str])
    • rawSchema (Optional[str])

property customProperties : Dict[str, str]

Custom property bag.

property definition : str

Definition of business term.

property id : None | str

Optional id for the term

property name : None | str

Display name of the term

property parentNode : None | str

Parent node of the glossary term

property rawSchema : None | str

Schema definition of the glossary term

property sourceRef : None | str

External Reference to the business-term

property sourceUrl : None | str

//spec.edmcouncil.org/fibo/ontology/FBC/FinancialInstruments/FinancialInstruments/CashInstrument.

  • Type: The abstracted URL such as https

property termSource : str

Source of the Business Term (INTERNAL or EXTERNAL) with default value as INTERNAL

GlossaryTermKeyClass

class datahub.metadata.schema_classes.GlossaryTermKeyClass(name)

Bases: _Aspect

Key for a GlossaryTerm

  • Parameters:name (str)

property name : str

The term name, which serves as a unique id

GlossaryTermSnapshotClass

class datahub.metadata.schema_classes.GlossaryTermSnapshotClass(urn, aspects)

Bases: DictWrapper

A metadata snapshot for a specific GlossaryTerm entity.

property aspects : List[GlossaryTermKeyClass | GlossaryTermInfoClass | OwnershipClass | StatusClass | BrowsePathsClass | GlossaryRelatedTermsClass]

The list of metadata aspects associated with the GlossaryTerm. Depending on the use case, this can either be all, or a selection, of supported aspects.

property urn : str

URN for the entity the metadata snapshot is associated with.

GlossaryTermsClass

class datahub.metadata.schema_classes.GlossaryTermsClass(terms, auditStamp)

Bases: _Aspect

Related business terms information

property auditStamp : AuditStampClass

Audit stamp containing who reported the related business term

property terms : List[GlossaryTermAssociationClass]

The related business terms

GroupMembershipClass

class datahub.metadata.schema_classes.GroupMembershipClass(groups)

Bases: _Aspect

Carries information about the CorpGroups a user is in.

  • Parameters:groups (List[str])

property groups : List[str]

HistogramClass

class datahub.metadata.schema_classes.HistogramClass(boundaries, heights)

Bases: DictWrapper

  • Parameters:
    • boundaries (List[str])
    • heights (List[float])

property boundaries : List[str]

property heights : List[float]

IcebergCatalogInfoClass

class datahub.metadata.schema_classes.IcebergCatalogInfoClass(metadataPointer = None, view = None)

Bases: _Aspect

Iceberg Catalog metadata associated with an Iceberg table/view

  • Parameters:
    • metadataPointer (Optional[str])
    • view (Optional[bool])

property metadataPointer : None | str

When Datahub is the REST Catalog for an Iceberg Table, stores the current metadata pointer. If the Iceberg table is managed by an external catalog, the metadata pointer is not set.

property view : None | bool

IcebergWarehouseInfoClass

class datahub.metadata.schema_classes.IcebergWarehouseInfoClass(dataRoot, clientId, clientSecret, region, env, role = None, tempCredentialExpirationSeconds = None)

Bases: _Aspect

An Iceberg warehouse location and credentails whose read/writes are governed by datahub catalog.

  • Parameters:
    • dataRoot (str)
    • clientId (str)
    • clientSecret (str)
    • region (str)
    • env (Union[str, FabricTypeClass]) –
    • role (Optional[str])
    • tempCredentialExpirationSeconds (Optional[int])

property clientId : str

clientId to be used to authenticate with storage hosting this warehouse

property clientSecret : str

client secret to authenticate with storage hosting this warehouse

property dataRoot : str

Path of the root for the backing store of the tables in the warehouse.

property env : str | FabricTypeClass

property region : str

region where the warehouse is located.

property role : None | str

property tempCredentialExpirationSeconds : None | int

IconLibraryClass

class datahub.metadata.schema_classes.IconLibraryClass()

Bases: object

Enum of possible icon sources

MATERIAL = 'MATERIAL'

IconPropertiesClass

class datahub.metadata.schema_classes.IconPropertiesClass(iconLibrary, name, style)

Bases: DictWrapper

Properties describing an icon associated with an entity

property iconLibrary : str | IconLibraryClass

e.g. Antd, Material, etc

  • Type: The source of the icon

property name : str

The name of the icon

property style : str

Any modifier for the icon, this will be library-specific, e.g. filled/outlined, etc

IncidentAssigneeClass

class datahub.metadata.schema_classes.IncidentAssigneeClass(actor, assignedAt)

Bases: DictWrapper

The incident assignee type. This is in a record so that we can add additional fields if we need to later (e.g. the type of the assignee.

property actor : str

The user or group assigned to the incident.

property assignedAt : AuditStampClass

The time & actor responsible for assiging the assignee.

IncidentInfoClass

class datahub.metadata.schema_classes.IncidentInfoClass(type, entities, status, created, customType = None, title = None, description = None, priority = None, assignees = None, source = None, startedAt = None)

Bases: _Aspect

Information about an incident raised on an asset.

property assignees : None | List[IncidentAssigneeClass]

The parties assigned with resolving the incident

property created : AuditStampClass

The time at which the request was initially created

property customType : None | str

An optional custom incident type. Present only if type is ‘CUSTOM’.

property description : None | str

Optional description associated with the incident

property entities : List[str]

A reference to the entity associated with the incident.

property priority : int | None

A numeric severity or priority for the incident. On the UI we will translate this into something easy to understand. Currently supported: 0 - CRITICAL, 1 - HIGH, 2 - MED, 3 - LOW (We probably should have modeled as an enum)

property source : None | IncidentSourceClass

The source of an incident, i.e. how it was generated.

property startedAt : None | int

The time at which the incident actually started (may be before the date it was raised).

property status : IncidentStatusClass

The current status of an incident, i.e. active or inactive.

property title : None | str

Optional title associated with the incident

property type : str | IncidentTypeClass

The type of incident

IncidentKeyClass

class datahub.metadata.schema_classes.IncidentKeyClass(id)

Bases: _Aspect

Key for an asset Incident

  • Parameters:id (str)

property id : str

A unique id for the incident. Generated on the server side at incident creation time.

IncidentSourceClass

class datahub.metadata.schema_classes.IncidentSourceClass(type, sourceUrn = None)

Bases: _Aspect

Information about the source of an incident raised on an asset.

property sourceUrn : None | str

Reference to an URN related to the source of an incident.

property type : str | IncidentSourceTypeClass

Message associated with the incident

IncidentSourceTypeClass

class datahub.metadata.schema_classes.IncidentSourceTypeClass()

Bases: object

ASSERTION_FAILURE = 'ASSERTION_FAILURE'

MANUAL = 'MANUAL'

IncidentStageClass

class datahub.metadata.schema_classes.IncidentStageClass()

Bases: object

FIXED = 'FIXED'

INVESTIGATION = 'INVESTIGATION'

NO_ACTION_REQUIRED = 'NO_ACTION_REQUIRED'

TRIAGE = 'TRIAGE'

WORK_IN_PROGRESS = 'WORK_IN_PROGRESS'

IncidentStateClass

class datahub.metadata.schema_classes.IncidentStateClass()

Bases: object

ACTIVE = 'ACTIVE'

RESOLVED = 'RESOLVED'

IncidentStatusClass

class datahub.metadata.schema_classes.IncidentStatusClass(state, lastUpdated, stage = None, message = None)

Bases: DictWrapper

Information about an incident raised on an asset

property lastUpdated : AuditStampClass

The time at which the request was initially created

property message : None | str

Optional message associated with the incident

property stage : None | str | IncidentStageClass

The lifecycle stage for the incident - Null means no stage was assigned yet. In the future, we may add CUSTOM here with a customStage string field for user-defined stages.

property state : str | IncidentStateClass

The top-level state of the incident, whether it’s active or resolved.

IncidentSummaryDetailsClass

class datahub.metadata.schema_classes.IncidentSummaryDetailsClass(urn, type, createdAt, resolvedAt = None, priority = None)

Bases: DictWrapper

Summary statistics about incidents on an entity.

  • Parameters:
    • urn (str)
    • type (str)
    • createdAt (int)
    • resolvedAt (Optional[int])
    • priority (Optional[int])

property createdAt : int

The time at which the incident was raised in milliseconds since epoch.

property priority : None | int

The priority of the incident

property resolvedAt : None | int

The time at which the incident was marked as resolved in milliseconds since epoch. Null if the incident is still active.

property type : str

The type of an incident

property urn : str

The urn of the incident

IncidentTypeClass

class datahub.metadata.schema_classes.IncidentTypeClass()

Bases: object

A type of asset incident

CUSTOM = 'CUSTOM'

DATA_SCHEMA = 'DATA_SCHEMA'

FIELD = 'FIELD'

FRESHNESS = 'FRESHNESS'

OPERATIONAL = 'OPERATIONAL'

SQL = 'SQL'

VOLUME = 'VOLUME'

IncidentsSummaryClass

class datahub.metadata.schema_classes.IncidentsSummaryClass(resolvedIncidents = None, activeIncidents = None, resolvedIncidentDetails = None, activeIncidentDetails = None)

Bases: _Aspect

Summary related incidents on an entity.

property activeIncidentDetails : List[IncidentSummaryDetailsClass]

Summary details about the set of active incidents

property activeIncidents : List[str]

Active incidents for an asset Deprecated! Use the richer activeIncidentsDetails instead.

property resolvedIncidentDetails : List[IncidentSummaryDetailsClass]

Summary details about the set of resolved incidents

property resolvedIncidents : List[str]

Resolved incidents for an asset Deprecated! Use the richer resolvedIncidentsDetails instead.

IncrementingSegmentFieldTransformerClass

class datahub.metadata.schema_classes.IncrementingSegmentFieldTransformerClass(type, nativeType = None)

Bases: DictWrapper

The definition of the transformer function that should be applied to a given field / column value in a dataset in order to determine the segment or bucket that it belongs to, which in turn is used to evaluate volume assertions.

property nativeType : None | str

The ‘native’ transformer type, useful as a back door if a custom operator is required. This field is required if the type is NATIVE.

property type : str | IncrementingSegmentFieldTransformerTypeClass

A ‘standard’ transformer type. Note that not all source systems will support all operators.

IncrementingSegmentFieldTransformerTypeClass

class datahub.metadata.schema_classes.IncrementingSegmentFieldTransformerTypeClass()

Bases: object

CEILING = 'CEILING'

FLOOR = 'FLOOR'

NATIVE = 'NATIVE'

TIMESTAMP_MS_TO_DATE = 'TIMESTAMP_MS_TO_DATE'

TIMESTAMP_MS_TO_HOUR = 'TIMESTAMP_MS_TO_HOUR'

TIMESTAMP_MS_TO_MINUTE = 'TIMESTAMP_MS_TO_MINUTE'

TIMESTAMP_MS_TO_MONTH = 'TIMESTAMP_MS_TO_MONTH'

TIMESTAMP_MS_TO_YEAR = 'TIMESTAMP_MS_TO_YEAR'

IncrementingSegmentRowCountChangeClass

class datahub.metadata.schema_classes.IncrementingSegmentRowCountChangeClass(segment, type, operator, parameters)

Bases: DictWrapper

Attributes defining an INCREMENTING_SEGMENT_ROW_COUNT_CHANGE volume assertion.

property operator : str | AssertionStdOperatorClass

The operator you’d like to apply to the row count value

Note that only numeric operators are valid inputs: GREATER_THAN, GREATER_THAN_OR_EQUAL_TO, EQUAL_TO, LESS_THAN, LESS_THAN_OR_EQUAL_TO, BETWEEN.

property parameters : AssertionStdParametersClass

The parameters you’d like to provide as input to the operator.

Note that only numeric parameter types are valid inputs: NUMBER.

property segment : IncrementingSegmentSpecClass

A specification of how the ‘segment’ can be derived using a column and an optional transformer function.

property type : str | AssertionValueChangeTypeClass

a fixed absolute value or a relative percentage.

  • Type: The type of the value used to evaluate the assertion

IncrementingSegmentRowCountTotalClass

class datahub.metadata.schema_classes.IncrementingSegmentRowCountTotalClass(segment, operator, parameters)

Bases: DictWrapper

Attributes defining an INCREMENTING_SEGMENT_ROW_COUNT_TOTAL volume assertion.

property operator : str | AssertionStdOperatorClass

The operator you’d like to apply.

Note that only numeric operators are valid inputs: GREATER_THAN, GREATER_THAN_OR_EQUAL_TO, EQUAL_TO, LESS_THAN, LESS_THAN_OR_EQUAL_TO, BETWEEN.

property parameters : AssertionStdParametersClass

The parameters you’d like to provide as input to the operator.

Note that only numeric parameter types are valid inputs: NUMBER.

property segment : IncrementingSegmentSpecClass

A specification of how the ‘segment’ can be derived using a column and an optional transformer function.

IncrementingSegmentSpecClass

class datahub.metadata.schema_classes.IncrementingSegmentSpecClass(field, transformer = None)

Bases: DictWrapper

Core attributes required to identify an incrementing segment in a table. This type is mainly useful for tables that constantly increase with new rows being added on a particular cadence (e.g. fact or event tables)

An incrementing segment represents a logical chunk of data which is INSERTED into a dataset on a regular interval, along with the presence of a constantly-incrementing column value such as an event time, date partition, or last modified column.

An incrementing segment is principally identified by 2 key attributes combined:

  1. A field or column that represents the incrementing value. New rows that are inserted will be identified using this column. Note that the value of this column may not by itself represent the “bucket” or the “segment” in which the row falls.
  2. [Optional] An transformer function that may be applied to the selected column value in order to obtain the final “segment identifier” or “bucket identifier”. Rows that have the same value after applying the transformation will be grouped into the same segment, using which the final value (e.g. row count) will be determined.

property field : SchemaFieldSpecClass

The field to use to generate segments. It must be constantly incrementing as new rows are inserted.

property transformer : None | IncrementingSegmentFieldTransformerClass

Optional transformer function to apply to the field in order to obtain the final segment or bucket identifier. If not provided, then no operator will be applied to the field. (identity function)

IngestionCheckpointStateClass

class datahub.metadata.schema_classes.IngestionCheckpointStateClass(formatVersion, serde, payload = None)

Bases: DictWrapper

The checkpoint state object of a datahub ingestion run for a given job.

  • Parameters:
    • formatVersion (str)
    • serde (str)
    • payload (Optional[bytes])

property formatVersion : str

The version of the state format.

property payload : None | bytes

Opaque blob of the state representation.

property serde : str

The serialization/deserialization protocol.

InputFieldClass

class datahub.metadata.schema_classes.InputFieldClass(schemaFieldUrn, schemaField = None)

Bases: DictWrapper

Information about a field a chart or dashboard references

property schemaField : None | SchemaFieldClass

Copied version of the referenced schema field object for indexing purposes

property schemaFieldUrn : str

Urn of the schema being referenced for lineage purposes

InputFieldsClass

class datahub.metadata.schema_classes.InputFieldsClass(fields)

Bases: _Aspect

Information about the fields a chart or dashboard references

property fields : List[InputFieldClass]

List of fields being referenced

InstitutionalMemoryClass

class datahub.metadata.schema_classes.InstitutionalMemoryClass(elements)

Bases: _Aspect

Institutional memory of an entity. This is a way to link to relevant documentation and provide description of the documentation. Institutional or tribal knowledge is very important for users to leverage the entity.

property elements : List[InstitutionalMemoryMetadataClass]

List of records that represent institutional memory of an entity. Each record consists of a link, description, creator and timestamps associated with that record.

InstitutionalMemoryMetadataClass

class datahub.metadata.schema_classes.InstitutionalMemoryMetadataClass(url, description, createStamp)

Bases: DictWrapper

Metadata corresponding to a record of institutional memory.

property createStamp : AuditStampClass

Audit stamp associated with creation of this record

property description : str

Description of the link.

property url : str

Link to an engineering design document or a wiki page.

IntendedUseClass

class datahub.metadata.schema_classes.IntendedUseClass(primaryUses = None, primaryUsers = None, outOfScopeUses = None)

Bases: _Aspect

Intended Use for the ML Model

  • Parameters:
    • primaryUses (Optional[List[str]])
    • primaryUsers (Optional[List[Union[str, IntendedUserTypeClass]]]) –
    • outOfScopeUses (Optional[List[str]])

property outOfScopeUses : None | List[str]

Highlight technology that the MLModel might easily be confused with, or related contexts that users could try to apply the MLModel to.

property primaryUsers : None | List[str | IntendedUserTypeClass]

Primary Intended Users - For example, was the MLModel developed for entertainment purposes, for hobbyists, or enterprise solutions?

property primaryUses : None | List[str]

Primary Use cases for the MLModel.

IntendedUserTypeClass

class datahub.metadata.schema_classes.IntendedUserTypeClass()

Bases: object

ENTERPRISE = 'ENTERPRISE'

ENTERTAINMENT = 'ENTERTAINMENT'

HOBBY = 'HOBBY'

InviteTokenClass

class datahub.metadata.schema_classes.InviteTokenClass(token, role = None)

Bases: _Aspect

Aspect used to store invite tokens.

  • Parameters:
    • token (str)
    • role (Optional[str])

property role : None | str

The role that this invite token may be associated with

property token : str

The encrypted invite token.

InviteTokenKeyClass

class datahub.metadata.schema_classes.InviteTokenKeyClass(id)

Bases: _Aspect

Key for an InviteToken.

  • Parameters:id (str)

property id : str

A unique id for the invite token.

JobStatusClass

class datahub.metadata.schema_classes.JobStatusClass()

Bases: object

Job statuses

COMPLETED = 'COMPLETED'

FAILED = 'FAILED'

IN_PROGRESS = 'IN_PROGRESS'

SKIPPED = 'SKIPPED'

STARTING = 'STARTING'

STOPPED = 'STOPPED'

STOPPING = 'STOPPING'

UNKNOWN = 'UNKNOWN'

KafkaAuditHeaderClass

class datahub.metadata.schema_classes.KafkaAuditHeaderClass(time, server, appName, messageId, instance = None, auditVersion = None, fabricUrn = None, clusterConnectionString = None)

Bases: DictWrapper

This header records information about the context of an event as it is emitted into kafka and is intended to be used by the kafka audit application. For more information see go/kafkaauditheader

  • Parameters:
    • time (int)
    • server (str)
    • appName (str)
    • messageId (bytes)
    • instance (Optional[str])
    • auditVersion (Optional[int])
    • fabricUrn (Optional[str])
    • clusterConnectionString (Optional[str])

property appName : str

The name of the application from which the event is being emitted. see go/appname

property auditVersion : None | int

if the schema has an outer KafkaAuditHeader, use the outer audit header timestamp for bucketing; else if the EventHeader has an inner KafkaAuditHeader use that inner audit header’s timestamp for bucketing

  • Type: The version that is being used for auditing. In version 0, the audit trail buckets events into 10 minute audit windows based on the EventHeader timestamp. In version 1, the audit trail buckets events as follows

property clusterConnectionString : None | str

This is a String that the client uses to establish some kind of connection with the Kafka cluster. The exact format of it depends on specific versions of clients and brokers. This information could potentially identify the fabric and cluster with which the client is producing to or consuming from.

property fabricUrn : None | str

fabric:{fabric_name}. See go/fabric.

  • Type: The fabricUrn of the host from which the event is being emitted. Fabric Urn in the format of urn
  • Type: li

property instance : None | str

The instance on the server from which the event is being emitted. e.g. i001

property messageId : bytes

A unique identifier for the message

property server : str

The fully qualified name of the host from which the event is being emitted.

property time : int

The time at which the event was emitted into kafka.

KafkaSchemaClass

class datahub.metadata.schema_classes.KafkaSchemaClass(documentSchema, documentSchemaType = None, keySchema = None, keySchemaType = None)

Bases: DictWrapper

Schema holder for kafka schema.

  • Parameters:
    • documentSchema (str)
    • documentSchemaType (Optional[str])
    • keySchema (Optional[str])
    • keySchemaType (Optional[str])

property documentSchema : str

The native kafka document schema. This is a human readable avro document schema.

property documentSchemaType : None | str

The native kafka document schema type. This can be AVRO/PROTOBUF/JSON.

property keySchema : None | str

The native kafka key schema as retrieved from Schema Registry

property keySchemaType : None | str

The native kafka key schema type. This can be AVRO/PROTOBUF/JSON.

KeyValueSchemaClass

class datahub.metadata.schema_classes.KeyValueSchemaClass(keySchema, valueSchema)

Bases: DictWrapper

Schema text of a key-value store schema.

  • Parameters:
    • keySchema (str)
    • valueSchema (str)

property keySchema : str

The raw schema for the key in the key-value store.

property valueSchema : str

The raw schema for the value in the key-value store.

MLFeatureDataTypeClass

class datahub.metadata.schema_classes.MLFeatureDataTypeClass()

Bases: object

MLFeature Data Type

AUDIO = 'AUDIO'

BINARY = 'BINARY'

BYTE = 'BYTE'

CONTINUOUS = 'CONTINUOUS'

COUNT = 'COUNT'

IMAGE = 'IMAGE'

INTERVAL = 'INTERVAL'

MAP = 'MAP'

NOMINAL = 'NOMINAL'

ORDINAL = 'ORDINAL'

SEQUENCE = 'SEQUENCE'

SET = 'SET'

TEXT = 'TEXT'

TIME = 'TIME'

UNKNOWN = 'UNKNOWN'

USELESS = 'USELESS'

VIDEO = 'VIDEO'

MLFeatureKeyClass

class datahub.metadata.schema_classes.MLFeatureKeyClass(featureNamespace, name)

Bases: _Aspect

Key for an MLFeature

  • Parameters:
    • featureNamespace (str)
    • name (str)

property featureNamespace : str

Namespace for the feature

property name : str

Name of the feature

MLFeaturePropertiesClass

class datahub.metadata.schema_classes.MLFeaturePropertiesClass(customProperties = None, description = None, dataType = None, version = None, sources = None)

Bases: _Aspect

Properties associated with a MLFeature

  • Parameters:
    • customProperties (Optional[Dict[str, str]])
    • description (Optional[str])
    • dataType (Union[None, str, MLFeatureDataTypeClass]) –
    • version (Optional[VersionTagClass]) –
    • sources (Optional[List[str]])

property customProperties : Dict[str, str]

Custom property bag.

property dataType : None | str | MLFeatureDataTypeClass

Data Type of the MLFeature

property description : None | str

Documentation of the MLFeature

property sources : None | List[str]

Source of the MLFeature

property version : None | VersionTagClass

Version of the MLFeature

MLFeatureSnapshotClass

class datahub.metadata.schema_classes.MLFeatureSnapshotClass(urn, aspects)

Bases: DictWrapper

property aspects : List[MLFeatureKeyClass | MLFeaturePropertiesClass | OwnershipClass | InstitutionalMemoryClass | StatusClass | DeprecationClass | BrowsePathsClass | GlobalTagsClass | DataPlatformInstanceClass | BrowsePathsV2Class]

The list of metadata aspects associated with the MLFeature. Depending on the use case, this can either be all, or a selection, of supported aspects.

property urn : str

URN for the entity the metadata snapshot is associated with.

MLFeatureTableKeyClass

class datahub.metadata.schema_classes.MLFeatureTableKeyClass(platform, name)

Bases: _Aspect

Key for an MLFeatureTable

  • Parameters:
    • platform (str)
    • name (str)

property name : str

Name of the feature table

property platform : str

Data platform urn associated with the feature table

MLFeatureTablePropertiesClass

class datahub.metadata.schema_classes.MLFeatureTablePropertiesClass(customProperties = None, description = None, mlFeatures = None, mlPrimaryKeys = None)

Bases: _Aspect

Properties associated with a MLFeatureTable

  • Parameters:
    • customProperties (Optional[Dict[str, str]])
    • description (Optional[str])
    • mlFeatures (Optional[List[str]])
    • mlPrimaryKeys (Optional[List[str]])

property customProperties : Dict[str, str]

Custom property bag.

property description : None | str

Documentation of the MLFeatureTable

property mlFeatures : None | List[str]

List of features contained in the feature table

property mlPrimaryKeys : None | List[str]

List of primary keys in the feature table (if multiple, assumed to act as a composite key)

MLFeatureTableSnapshotClass

class datahub.metadata.schema_classes.MLFeatureTableSnapshotClass(urn, aspects)

Bases: DictWrapper

property aspects : List[MLFeatureTableKeyClass | MLFeatureTablePropertiesClass | OwnershipClass | InstitutionalMemoryClass | StatusClass | DeprecationClass | BrowsePathsClass | GlobalTagsClass | DataPlatformInstanceClass | BrowsePathsV2Class]

The list of metadata aspects associated with the MLFeatureTable. Depending on the use case, this can either be all, or a selection, of supported aspects.

property urn : str

URN for the entity the metadata snapshot is associated with.

MLHyperParamClass

class datahub.metadata.schema_classes.MLHyperParamClass(name, description = None, value = None, createdAt = None)

Bases: _Aspect

Properties associated with an ML Hyper Param

  • Parameters:
    • name (str)
    • description (Optional[str])
    • value (Optional[str])
    • createdAt (Optional[int])

property createdAt : None | int

Date when the MLHyperParam was developed

property description : None | str

Documentation of the MLHyperParam

property name : str

Name of the MLHyperParam

property value : None | str

The value of the MLHyperParam

MLMetricClass

class datahub.metadata.schema_classes.MLMetricClass(name, description = None, value = None, createdAt = None)

Bases: _Aspect

Properties associated with an ML Metric

  • Parameters:
    • name (str)
    • description (Optional[str])
    • value (Optional[str])
    • createdAt (Optional[int])

property createdAt : None | int

Date when the mlMetric was developed

property description : None | str

Documentation of the mlMetric

property name : str

Name of the mlMetric

property value : None | str

The value of the mlMetric

MLModelDeploymentKeyClass

class datahub.metadata.schema_classes.MLModelDeploymentKeyClass(platform, name, origin)

Bases: _Aspect

Key for an ML model deployment

  • Parameters:

property name : str

Name of the MLModelDeployment

property origin : str | FabricTypeClass

Fabric type where model Deployment belongs to or where it was generated

property platform : str

Standardized platform urn for the model Deployment

MLModelDeploymentPropertiesClass

class datahub.metadata.schema_classes.MLModelDeploymentPropertiesClass(customProperties = None, externalUrl = None, description = None, createdAt = None, version = None, status = None)

Bases: _Aspect

Properties associated with an ML Model Deployment

  • Parameters:
    • customProperties (Optional[Dict[str, str]])
    • externalUrl (Optional[str])
    • description (Optional[str])
    • createdAt (Optional[int])
    • version (Optional[VersionTagClass]) –
    • status (Union[None, str, DeploymentStatusClass]) –

property createdAt : None | int

Date when the MLModelDeployment was developed

property customProperties : Dict[str, str]

Custom property bag.

property description : None | str

Documentation of the MLModelDeployment

property externalUrl : None | str

URL where the reference exist

property status : None | str | DeploymentStatusClass

Status of the deployment

property version : None | VersionTagClass

Version of the MLModelDeployment

MLModelDeploymentSnapshotClass

class datahub.metadata.schema_classes.MLModelDeploymentSnapshotClass(urn, aspects)

Bases: DictWrapper

property aspects : List[MLModelDeploymentKeyClass | MLModelDeploymentPropertiesClass | OwnershipClass | StatusClass | DeprecationClass | GlobalTagsClass | DataPlatformInstanceClass]

The list of metadata aspects associated with the MLModelDeployment. Depending on the use case, this can either be all, or a selection, of supported aspects.

property urn : str

URN for the entity the metadata snapshot is associated with.

MLModelFactorPromptsClass

class datahub.metadata.schema_classes.MLModelFactorPromptsClass(relevantFactors = None, evaluationFactors = None)

Bases: _Aspect

Prompts which affect the performance of the MLModel

property evaluationFactors : None | List[MLModelFactorsClass]

Which factors are being reported, and why were these chosen?

property relevantFactors : None | List[MLModelFactorsClass]

What are foreseeable salient factors for which MLModel performance may vary, and how were these determined?

MLModelFactorsClass

class datahub.metadata.schema_classes.MLModelFactorsClass(groups = None, instrumentation = None, environment = None)

Bases: DictWrapper

Factors affecting the performance of the MLModel.

  • Parameters:
    • groups (Optional[List[str]])
    • instrumentation (Optional[List[str]])
    • environment (Optional[List[str]])

property environment : None | List[str]

A further factor affecting MLModel performance is the environment in which it is deployed.

property groups : None | List[str]

Groups refers to distinct categories with similar characteristics that are present in the evaluation data instances. For human-centric machine learning MLModels, groups are people who share one or multiple characteristics.

property instrumentation : None | List[str]

The performance of a MLModel can vary depending on what instruments were used to capture the input to the MLModel. For example, a face detection model may perform differently depending on the camera’s hardware and software, including lens, image stabilization, high dynamic range techniques, and background blurring for portrait mode.

MLModelGroupKeyClass

class datahub.metadata.schema_classes.MLModelGroupKeyClass(platform, name, origin)

Bases: _Aspect

Key for an ML model group

  • Parameters:

property name : str

Name of the MLModelGroup

property origin : str | FabricTypeClass

Fabric type where model group belongs to or where it was generated

property platform : str

Standardized platform urn for the model group

MLModelGroupPropertiesClass

class datahub.metadata.schema_classes.MLModelGroupPropertiesClass(customProperties = None, trainingJobs = None, downstreamJobs = None, externalUrl = None, name = None, description = None, createdAt = None, created = None, lastModified = None, version = None)

Bases: _Aspect

Properties associated with an ML Model Group

  • Parameters:
    • customProperties (Optional[Dict[str, str]])
    • trainingJobs (Optional[List[str]])
    • downstreamJobs (Optional[List[str]])
    • externalUrl (Optional[str])
    • name (Optional[str])
    • description (Optional[str])
    • createdAt (Optional[int])
    • created (Optional[TimeStampClass]) –
    • lastModified (Optional[TimeStampClass]) –
    • version (Optional[VersionTagClass]) –

property created : None | TimeStampClass

Time and Actor who created the MLModelGroup

property createdAt : None | int

Date when the MLModelGroup was developed

property customProperties : Dict[str, str]

Custom property bag.

property description : None | str

Documentation of the MLModelGroup

property downstreamJobs : None | List[str]

List of jobs or process instances (if any) that use the model or group.

property externalUrl : None | str

URL where the reference exist

property lastModified : None | TimeStampClass

Date when the MLModelGroup was last modified

property name : None | str

Display name of the MLModelGroup

property trainingJobs : None | List[str]

List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.

property version : None | VersionTagClass

Version of the MLModelGroup

MLModelGroupSnapshotClass

class datahub.metadata.schema_classes.MLModelGroupSnapshotClass(urn, aspects)

Bases: DictWrapper

property aspects : List[MLModelGroupKeyClass | MLModelGroupPropertiesClass | OwnershipClass | StatusClass | DeprecationClass | BrowsePathsClass | GlobalTagsClass | DataPlatformInstanceClass | BrowsePathsV2Class]

The list of metadata aspects associated with the MLModelGroup. Depending on the use case, this can either be all, or a selection, of supported aspects.

property urn : str

URN for the entity the metadata snapshot is associated with.

MLModelKeyClass

class datahub.metadata.schema_classes.MLModelKeyClass(platform, name, origin)

Bases: _Aspect

Key for an ML model

  • Parameters:

property name : str

Name of the MLModel

property origin : str | FabricTypeClass

Fabric type where model belongs to or where it was generated

property platform : str

Standardized platform urn for the model

MLModelPropertiesClass

class datahub.metadata.schema_classes.MLModelPropertiesClass(customProperties = None, externalUrl = None, trainingJobs = None, downstreamJobs = None, name = None, description = None, date = None, created = None, lastModified = None, version = None, type = None, hyperParameters = None, hyperParams = None, trainingMetrics = None, onlineMetrics = None, mlFeatures = None, tags = None, deployments = None, groups = None)

Bases: _Aspect

Properties associated with a ML Model

  • Parameters:
    • customProperties (Optional[Dict[str, str]])
    • externalUrl (Optional[str])
    • trainingJobs (Optional[List[str]])
    • downstreamJobs (Optional[List[str]])
    • name (Optional[str])
    • description (Optional[str])
    • date (Optional[int])
    • created (Optional[TimeStampClass]) –
    • lastModified (Optional[TimeStampClass]) –
    • version (Optional[VersionTagClass]) –
    • type (Optional[str])
    • hyperParameters (Optional[Dict[str, Union[str, int, float, bool]]])
    • hyperParams (Optional[List[MLHyperParamClass]]) –
    • trainingMetrics (Optional[List[MLMetricClass]]) –
    • onlineMetrics (Optional[List[MLMetricClass]]) –
    • mlFeatures (Optional[List[str]])
    • tags (Optional[List[str]])
    • deployments (Optional[List[str]])
    • groups (Optional[List[str]])

property created : None | TimeStampClass

Audit stamp containing who created this and when

property customProperties : Dict[str, str]

Custom property bag.

property date : None | int

Date when the MLModel was developed

property deployments : None | List[str]

Deployments for the MLModel

property description : None | str

Documentation of the MLModel

property downstreamJobs : None | List[str]

List of jobs or process instances (if any) that use the model or group.

property externalUrl : None | str

URL where the reference exist

property groups : None | List[str]

Groups the model belongs to

property hyperParameters : None | Dict[str, str | int | float | bool]

Hyper Parameters of the MLModel

NOTE: these are deprecated in favor of hyperParams

property hyperParams : None | List[MLHyperParamClass]

Hyperparameters of the MLModel

property lastModified : None | TimeStampClass

Date when the MLModel was last modified

property mlFeatures : None | List[str]

List of features used for MLModel training

property name : None | str

Display name of the MLModel

property onlineMetrics : None | List[MLMetricClass]

Metrics of the MLModel used in production

property tags : List[str]

Tags for the MLModel

property trainingJobs : None | List[str]

List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.

property trainingMetrics : None | List[MLMetricClass]

Metrics of the MLModel used in training

property type : None | str

Type of Algorithm or MLModel such as whether it is a Naive Bayes classifier, Convolutional Neural Network, etc

property version : None | VersionTagClass

Version of the MLModel

MLModelSnapshotClass

class datahub.metadata.schema_classes.MLModelSnapshotClass(urn, aspects)

Bases: DictWrapper

MLModel Snapshot entity details.

property aspects : List[MLModelKeyClass | OwnershipClass | MLModelPropertiesClass | IntendedUseClass | MLModelFactorPromptsClass | MetricsClass | EvaluationDataClass | TrainingDataClass | QuantitativeAnalysesClass | EthicalConsiderationsClass | CaveatsAndRecommendationsClass | InstitutionalMemoryClass | SourceCodeClass | StatusClass | CostClass | DeprecationClass | BrowsePathsClass | GlobalTagsClass | DataPlatformInstanceClass | BrowsePathsV2Class]

The list of metadata aspects associated with the MLModel. Depending on the use case, this can either be all, or a selection, of supported aspects.

property urn : str

URN for the entity the metadata snapshot is associated with.

MLPrimaryKeyKeyClass

class datahub.metadata.schema_classes.MLPrimaryKeyKeyClass(featureNamespace, name)

Bases: _Aspect

Key for an MLPrimaryKey

  • Parameters:
    • featureNamespace (str)
    • name (str)

property featureNamespace : str

Namespace for the primary key

property name : str

Name of the primary key

MLPrimaryKeyPropertiesClass

class datahub.metadata.schema_classes.MLPrimaryKeyPropertiesClass(sources, customProperties = None, description = None, dataType = None, version = None)

Bases: _Aspect

Properties associated with a MLPrimaryKey

  • Parameters:
    • sources (List[str])
    • customProperties (Optional[Dict[str, str]])
    • description (Optional[str])
    • dataType (Union[None, str, MLFeatureDataTypeClass]) –
    • version (Optional[VersionTagClass]) –

property customProperties : Dict[str, str]

Custom property bag.

property dataType : None | str | MLFeatureDataTypeClass

Data Type of the MLPrimaryKey

property description : None | str

Documentation of the MLPrimaryKey

property sources : List[str]

Source of the MLPrimaryKey

property version : None | VersionTagClass

Version of the MLPrimaryKey

MLPrimaryKeySnapshotClass

class datahub.metadata.schema_classes.MLPrimaryKeySnapshotClass(urn, aspects)

Bases: DictWrapper

property aspects : List[MLPrimaryKeyKeyClass | MLPrimaryKeyPropertiesClass | OwnershipClass | InstitutionalMemoryClass | StatusClass | DeprecationClass | GlobalTagsClass | DataPlatformInstanceClass]

The list of metadata aspects associated with the MLPrimaryKey. Depending on the use case, this can either be all, or a selection, of supported aspects.

property urn : str

URN for the entity the metadata snapshot is associated with.

MLTrainingRunPropertiesClass

class datahub.metadata.schema_classes.MLTrainingRunPropertiesClass(customProperties = None, externalUrl = None, id = None, outputUrls = None, hyperParams = None, trainingMetrics = None)

Bases: _Aspect

The inputs and outputs of this training run

  • Parameters:
    • customProperties (Optional[Dict[str, str]])
    • externalUrl (Optional[str])
    • id (Optional[str])
    • outputUrls (Optional[List[str]])
    • hyperParams (Optional[List[MLHyperParamClass]]) –
    • trainingMetrics (Optional[List[MLMetricClass]]) –

property customProperties : Dict[str, str]

Custom property bag.

property externalUrl : None | str

URL where the reference exist

property hyperParams : None | List[MLHyperParamClass]

Hyperparameters of the ML Training Run

property id : None | str

Run Id of the ML Training Run

property outputUrls : None | List[str]

List of URLs for the Outputs of the ML Training Run

property trainingMetrics : None | List[MLMetricClass]

Metrics of the ML Training Run

MapTypeClass

class datahub.metadata.schema_classes.MapTypeClass(keyType = None, valueType = None)

Bases: DictWrapper

Map field type.

  • Parameters:
    • keyType (Optional[str])
    • valueType (Optional[str])

property keyType : None | str

Key type in a map

property valueType : None | str

Type of the value in a map

MediaClass

class datahub.metadata.schema_classes.MediaClass(type, location)

Bases: DictWrapper

Carries information about which roles a user is assigned to.

property location : str

Where the media content is stored.

property type : str | MediaTypeClass

Type of content the Media is storing, e.g. image, video, etc.

MediaTypeClass

class datahub.metadata.schema_classes.MediaTypeClass()

Bases: object

Enum defining the type of content a Media object holds.

IMAGE = 'IMAGE'

MetadataAttributionClass

class datahub.metadata.schema_classes.MetadataAttributionClass(time, actor, source = None, sourceDetail = None)

Bases: DictWrapper

Information about who, why, and how this metadata was applied

  • Parameters:
    • time (int)
    • actor (str)
    • source (Optional[str])
    • sourceDetail (Optional[Dict[str, str]])

property actor : str

The entity (e.g. a member URN) responsible for applying the assocated metadata. This can either be a user (in case of UI edits) or the datahub system for automation.

property source : None | str

The DataHub source responsible for applying the associated metadata. This will only be filled out when a DataHub source is responsible. This includes the specific metadata test urn, the automation urn.

property sourceDetail : Dict[str, str]

The details associated with why this metadata was applied. For example, this could include the actual regex rule, sql statement, ingestion pipeline ID, etc.

property time : int

When this metadata was updated.

MetadataChangeEventClass

class datahub.metadata.schema_classes.MetadataChangeEventClass(proposedSnapshot, auditHeader = None, proposedDelta = None, systemMetadata = None)

Bases: DictWrapper

Kafka event for proposing a metadata change for an entity. A corresponding MetadataAuditEvent is emitted when the change is accepted and committed, otherwise a FailedMetadataChangeEvent will be emitted instead.

property auditHeader : None | KafkaAuditHeaderClass

Kafka audit header. See go/kafkaauditheader for more info.

property proposedDelta : None

Delta of the proposed metadata partial update.

property proposedSnapshot : ChartSnapshotClass | CorpGroupSnapshotClass | CorpUserSnapshotClass | DashboardSnapshotClass | DataFlowSnapshotClass | DataJobSnapshotClass | DatasetSnapshotClass | DataProcessSnapshotClass | DataPlatformSnapshotClass | MLModelSnapshotClass | MLPrimaryKeySnapshotClass | MLFeatureSnapshotClass | MLFeatureTableSnapshotClass | MLModelDeploymentSnapshotClass | MLModelGroupSnapshotClass | TagSnapshotClass | GlossaryTermSnapshotClass | GlossaryNodeSnapshotClass | DataHubPolicySnapshotClass | SchemaFieldSnapshotClass | DataHubRetentionSnapshotClass

Snapshot of the proposed metadata change. Include only the aspects affected by the change in the snapshot.

property systemMetadata : None | SystemMetadataClass

Metadata around how the snapshot was ingested

MetadataChangeLogClass

class datahub.metadata.schema_classes.MetadataChangeLogClass(entityType, changeType, auditHeader = None, entityUrn = None, entityKeyAspect = None, aspectName = None, aspect = None, systemMetadata = None, headers = None, previousAspectValue = None, previousSystemMetadata = None, created = None)

Bases: DictWrapper

Kafka event for capturing update made to an entity’s metadata.

property aspect : None | GenericAspectClass

The value of the new aspect.

property aspectName : None | str

Aspect of the entity being written to Not filling this out implies that the writer wants to affect the entire entity Note: This is only valid for CREATE, UPSERT, and DELETE operations.

property auditHeader : None | KafkaAuditHeaderClass

Kafka audit header. Currently remains unused in the open source.

property changeType : str | ChangeTypeClass

Type of change being proposed

property created : None | AuditStampClass

An audit stamp detailing who and when the aspect was changed by. Required for all intents and purposes.

property entityKeyAspect : None | GenericAspectClass

Key aspect of the entity being written

property entityType : str

Type of the entity being written to

property entityUrn : None | str

Urn of the entity being written

property headers : None | Dict[str, str]

Headers - intended to mimic http headers

property previousAspectValue : None | GenericAspectClass

The previous value of the aspect that has changed.

property previousSystemMetadata : None | SystemMetadataClass

The previous value of the system metadata field that has changed.

property systemMetadata : None | SystemMetadataClass

System properties that one might want to attach to an event

MetadataChangeProposalClass

class datahub.metadata.schema_classes.MetadataChangeProposalClass(entityType, changeType, auditHeader = None, entityUrn = None, entityKeyAspect = None, aspectName = None, aspect = None, systemMetadata = None, headers = None)

Bases: DictWrapper

Kafka event for proposing a metadata change for an entity. A corresponding MetadataChangeLog is emitted when the change is accepted and committed, otherwise a FailedMetadataChangeProposal will be emitted instead.

property aspect : None | GenericAspectClass

The value of the new aspect.

property aspectName : None | str

Aspect of the entity being written to Not filling this out implies that the writer wants to affect the entire entity Note: This is only valid for CREATE, UPSERT, and DELETE operations.

property auditHeader : None | KafkaAuditHeaderClass

Kafka audit header. Currently remains unused in the open source.

property changeType : str | ChangeTypeClass

Type of change being proposed

property entityKeyAspect : None | GenericAspectClass

Key aspect of the entity being written

property entityType : str

Type of the entity being written to

property entityUrn : None | str

Urn of the entity being written

property headers : None | Dict[str, str]

Headers - intended to mimic http headers

property systemMetadata : None | SystemMetadataClass

System properties that one might want to attach to an event

MetricsClass

class datahub.metadata.schema_classes.MetricsClass(performanceMeasures = None, decisionThreshold = None)

Bases: _Aspect

Metrics to be featured for the MLModel.

  • Parameters:
    • performanceMeasures (Optional[List[str]])
    • decisionThreshold (Optional[List[str]])

property decisionThreshold : None | List[str]

Decision Thresholds used (if any)?

property performanceMeasures : None | List[str]

Measures of MLModel performance

MySqlDDLClass

class datahub.metadata.schema_classes.MySqlDDLClass(tableSchema)

Bases: DictWrapper

Schema holder for MySql data definition language that describes an MySql table.

  • Parameters:tableSchema (str)

property tableSchema : str

The native schema in the dataset’s platform. This is a human readable (json blob) table schema.

NativeGroupMembershipClass

class datahub.metadata.schema_classes.NativeGroupMembershipClass(nativeGroups)

Bases: _Aspect

Carries information about the native CorpGroups a user is in.

  • Parameters:nativeGroups (List[str])

property nativeGroups : List[str]

NotebookCellClass

class datahub.metadata.schema_classes.NotebookCellClass(type, textCell = None, queryCell = None, chartCell = None)

Bases: DictWrapper

A record of all supported cells for a Notebook. Only one type of cell will be non-null.

property chartCell : None | ChartCellClass

The chart cell content. The will be non-null only when all other cell field is null.

property queryCell : None | QueryCellClass

The query cell content. The will be non-null only when all other cell field is null.

property textCell : None | TextCellClass

The text cell content. The will be non-null only when all other cell field is null.

property type : str | NotebookCellTypeClass

The type of this Notebook cell

NotebookCellTypeClass

class datahub.metadata.schema_classes.NotebookCellTypeClass()

Bases: object

Type of Notebook Cell

CHART_CELL = 'CHART_CELL'

QUERY_CELL = 'QUERY_CELL'

TEXT_CELL = 'TEXT_CELL'

NotebookContentClass

class datahub.metadata.schema_classes.NotebookContentClass(cells = None)

Bases: _Aspect

Content in a Notebook Note: This is IN BETA version

property cells : List[NotebookCellClass]

The content of a Notebook which is composed by a list of NotebookCell

NotebookInfoClass

class datahub.metadata.schema_classes.NotebookInfoClass(title, changeAuditStamps, customProperties = None, externalUrl = None, description = None)

Bases: _Aspect

Information about a Notebook Note: This is IN BETA version

  • Parameters:
    • title (str)
    • changeAuditStamps (ChangeAuditStampsClass) –
    • customProperties (Optional[Dict[str, str]])
    • externalUrl (Optional[str])
    • description (Optional[str])

property changeAuditStamps : ChangeAuditStampsClass

Captures information about who created/last modified/deleted this Notebook and when

property customProperties : Dict[str, str]

Custom property bag.

property description : None | str

Detailed description about the Notebook

property externalUrl : None | str

URL where the reference exist

property title : str

Title of the Notebook

NotebookKeyClass

class datahub.metadata.schema_classes.NotebookKeyClass(notebookTool, notebookId)

Bases: _Aspect

Key for a Notebook

  • Parameters:
    • notebookTool (str)
    • notebookId (str)

property notebookId : str

Unique id for the Notebook. This id should be globally unique for a Notebook tool even when there are multiple deployments of it. As an example, Notebook URL could be used here for QueryBook such as ‘querybook.com/notebook/773’

property notebookTool : str

The name of the Notebook tool such as QueryBook, etc.

NotificationSettingsClass

class datahub.metadata.schema_classes.NotificationSettingsClass(sinkTypes, slackSettings = None, emailSettings = None)

Bases: DictWrapper

Notification settings for an actor or subscription.

property emailSettings : None | EmailNotificationSettingsClass

Email Notification Settings

property sinkTypes : List[str | NotificationSinkTypeClass]

Sink types that notifications are sent to.

property slackSettings : None | SlackNotificationSettingsClass

Slack Notification Settings

NotificationSinkTypeClass

class datahub.metadata.schema_classes.NotificationSinkTypeClass()

Bases: object

A type of sink / platform to send a notification to.

EMAIL = 'EMAIL'

SLACK = 'SLACK'

NullTypeClass

class datahub.metadata.schema_classes.NullTypeClass()

Bases: DictWrapper

Null field type.

NumberTypeClass

class datahub.metadata.schema_classes.NumberTypeClass()

Bases: DictWrapper

Number data type: long, integer, short, etc..

OidcSettingsClass

class datahub.metadata.schema_classes.OidcSettingsClass(enabled, clientId, clientSecret, discoveryUri, userNameClaim = None, userNameClaimRegex = None, scope = None, clientAuthenticationMethod = None, jitProvisioningEnabled = None, preProvisioningRequired = None, extractGroupsEnabled = None, groupsClaim = None, responseType = None, responseMode = None, useNonce = None, readTimeout = None, extractJwtAccessTokenClaims = None, preferredJwsAlgorithm = None, preferredJwsAlgorithm2 = None)

Bases: DictWrapper

Settings for OIDC SSO integration.

  • Parameters:
    • enabled (bool)
    • clientId (str)
    • clientSecret (str)
    • discoveryUri (str)
    • userNameClaim (Optional[str])
    • userNameClaimRegex (Optional[str])
    • scope (Optional[str])
    • clientAuthenticationMethod (Optional[str])
    • jitProvisioningEnabled (Optional[bool])
    • preProvisioningRequired (Optional[bool])
    • extractGroupsEnabled (Optional[bool])
    • groupsClaim (Optional[str])
    • responseType (Optional[str])
    • responseMode (Optional[str])
    • useNonce (Optional[bool])
    • readTimeout (Optional[int])
    • extractJwtAccessTokenClaims (Optional[bool])
    • preferredJwsAlgorithm (Optional[str])
    • preferredJwsAlgorithm2 (Optional[str])

property clientAuthenticationMethod : None | str

Defaults to “client_secret_basic”.

  • Type: ADVANCED. Which authentication method to use to pass credentials (clientId and clientSecret) to the token endpoint

property clientId : str

Unique client id issued by the identity provider.

property clientSecret : str

Unique client secret issued by the identity provider.

property discoveryUri : str

The IdP OIDC discovery url.

property enabled : bool

Whether OIDC SSO is enabled.

property extractGroupsEnabled : None | bool

ADVANCED. Whether groups should be extracted from a claim in the OIDC profile. Only applies if JIT provisioning is enabled. Groups will be created if they do not exist. Defaults to true.

property extractJwtAccessTokenClaims : None | bool

ADVANCED. Whether to extract claims from JWT access token. Defaults to false.

property groupsClaim : None | str

ADVANCED. The OIDC claim to extract groups information from. Defaults to ‘groups’.

property jitProvisioningEnabled : None | bool

ADVANCED. Whether DataHub users should be provisioned on login if they do not exist. Defaults to true.

property preProvisioningRequired : None | bool

ADVANCED. Whether the user should already exist in DataHub on login, failing login if they are not. Defaults to false.

property preferredJwsAlgorithm : None | str

ADVANCED. Which jws algorithm to use. Unused.

property preferredJwsAlgorithm2 : None | str

ADVANCED. Which jws algorithm to use.

property readTimeout : None | int

ADVANCED. Read timeout.

property responseMode : None | str

ADVANCED. Response mode.

property responseType : None | str

ADVANCED. Response type.

property scope : None | str

ADVANCED. String representing the requested scope from the IdP. Defaults to “oidc email profile”.

property useNonce : None | bool

ADVANCED. Use Nonce.

property userNameClaim : None | str

ADVANCED. The attribute / claim used to derive the DataHub username. Defaults to “preferred_username”.

property userNameClaimRegex : None | str

ADVANCED. TThe regex used to parse the DataHub username from the user name claim. Defaults to (.*) (all).

OperationClass

class datahub.metadata.schema_classes.OperationClass(timestampMillis, operationType, lastUpdatedTimestamp, eventGranularity = None, partitionSpec = None, messageId = None, actor = None, customOperationType = None, numAffectedRows = None, affectedDatasets = None, sourceType = None, customProperties = None, queries = None)

Bases: _Aspect

Operational info for an entity.

  • Parameters:
    • timestampMillis (int)
    • operationType (Union[str, OperationTypeClass]) –
    • lastUpdatedTimestamp (int)
    • eventGranularity (Optional[TimeWindowSizeClass]) –
    • partitionSpec (Optional[PartitionSpecClass]) –
    • messageId (Optional[str])
    • actor (Optional[str])
    • customOperationType (Optional[str])
    • numAffectedRows (Optional[int])
    • affectedDatasets (Optional[List[str]])
    • sourceType (Union[None, str, OperationSourceTypeClass]) –
    • customProperties (Optional[Dict[str, str]])
    • queries (Optional[List[str]])

ASPECT_TYPE : ClassVar[str] = 'timeseries'

property actor : None | str

Actor who issued this operation.

property affectedDatasets : None | List[str]

Which other datasets were affected by this operation.

property customOperationType : None | str

A custom type of operation. Required if operationType is CUSTOM.

property customProperties : None | Dict[str, str]

Custom properties

property eventGranularity : None | TimeWindowSizeClass

Granularity of the event if applicable

property lastUpdatedTimestamp : int

The time at which the operation occurred. Would be better named ‘operationTime’

property messageId : None | str

The optional messageId, if provided serves as a custom user-defined unique identifier for an aspect value.

property numAffectedRows : None | int

How many rows were affected by this operation.

property operationType : str | OperationTypeClass

Operation type of change.

property partitionSpec : PartitionSpecClass | None

The optional partition specification.

property queries : None | List[str]

Which queries were used in this operation.

property sourceType : None | str | OperationSourceTypeClass

Source Type

property timestampMillis : int

The event timestamp field as epoch at UTC in milli seconds.

OperationSourceTypeClass

class datahub.metadata.schema_classes.OperationSourceTypeClass()

Bases: object

The source of an operation

DATA_PLATFORM = 'DATA_PLATFORM'

DATA_PROCESS = 'DATA_PROCESS'

OperationTypeClass

class datahub.metadata.schema_classes.OperationTypeClass()

Bases: object

Enum to define the operation type when an entity changes.

ALTER = 'ALTER'

CREATE = 'CREATE'

CUSTOM = 'CUSTOM'

DELETE = 'DELETE'

DROP = 'DROP'

INSERT = 'INSERT'

UNKNOWN = 'UNKNOWN'

UPDATE = 'UPDATE'

OracleDDLClass

class datahub.metadata.schema_classes.OracleDDLClass(tableSchema)

Bases: DictWrapper

Schema holder for oracle data definition language that describes an oracle table.

  • Parameters:tableSchema (str)

property tableSchema : str

The native schema in the dataset’s platform. This is a human readable (json blob) table schema.

OrcSchemaClass

class datahub.metadata.schema_classes.OrcSchemaClass(schema)

Bases: DictWrapper

Schema text of an ORC schema.

  • Parameters:schema (str)

property schema : str

The native schema for ORC file format.

OriginClass

class datahub.metadata.schema_classes.OriginClass(type, externalType = None)

Bases: _Aspect

Carries information about where an entity originated from.

  • Parameters:

property externalType : None | str

Only populated if type is EXTERNAL. The externalType of the entity, such as the name of the identity provider.

property type : str | OriginTypeClass

Where an entity originated from. Either NATIVE or EXTERNAL.

OriginTypeClass

class datahub.metadata.schema_classes.OriginTypeClass()

Bases: object

Enum to define where an entity originated from.

EXTERNAL = 'EXTERNAL'

NATIVE = 'NATIVE'

OtherSchemaClass

class datahub.metadata.schema_classes.OtherSchemaClass(rawSchema)

Bases: DictWrapper

Schema holder for undefined schema types.

  • Parameters:rawSchema (str)

property rawSchema : str

The native schema in the dataset’s platform.

OwnerClass

class datahub.metadata.schema_classes.OwnerClass(owner, type, typeUrn = None, source = None)

Bases: DictWrapper

Ownership information

property owner : str

corpuser:ldap, urn:li:corpGroup:group_name, and urn:li:multiProduct:mp_name (Caveat: only corpuser is currently supported in the frontend.)

  • Type: Owner URN, e.g. urn
  • Type: li

property source : None | OwnershipSourceClass

Source information for the ownership

property type : str | OwnershipTypeClass

The type of the ownership

property typeUrn : None | str

The type of the ownership Urn of type O

OwnershipClass

class datahub.metadata.schema_classes.OwnershipClass(owners, ownerTypes = None, lastModified = None)

Bases: _Aspect

Ownership information of an entity.

  • Parameters:

property lastModified : AuditStampClass

Audit stamp containing who last modified the record and when. A value of 0 in the time field indicates missing data.

property ownerTypes : Dict[str, List[str]] | None

Ownership type to Owners map, populated via mutation hook.

property owners : List[OwnerClass]

List of owners of the entity.

OwnershipSourceClass

class datahub.metadata.schema_classes.OwnershipSourceClass(type, url = None)

Bases: DictWrapper

Source/provider of the ownership information

property type : str | OwnershipSourceTypeClass

The type of the source

property url : None | str

A reference URL for the source

OwnershipSourceTypeClass

class datahub.metadata.schema_classes.OwnershipSourceTypeClass()

Bases: object

AUDIT = 'AUDIT'

DATABASE = 'DATABASE'

FILE_SYSTEM = 'FILE_SYSTEM'

ISSUE_TRACKING_SYSTEM = 'ISSUE_TRACKING_SYSTEM'

MANUAL = 'MANUAL'

OTHER = 'OTHER'

SERVICE = 'SERVICE'

SOURCE_CONTROL = 'SOURCE_CONTROL'

OwnershipTypeClass

class datahub.metadata.schema_classes.OwnershipTypeClass()

Bases: object

Asset owner types

BUSINESS_OWNER = 'BUSINESS_OWNER'

CONSUMER = 'CONSUMER'

CUSTOM = 'CUSTOM'

DATAOWNER = 'DATAOWNER'

DATA_STEWARD = 'DATA_STEWARD'

DELEGATE = 'DELEGATE'

DEVELOPER = 'DEVELOPER'

NONE = 'NONE'

PRODUCER = 'PRODUCER'

STAKEHOLDER = 'STAKEHOLDER'

TECHNICAL_OWNER = 'TECHNICAL_OWNER'

OwnershipTypeInfoClass

class datahub.metadata.schema_classes.OwnershipTypeInfoClass(name, created, lastModified, description = None)

Bases: _Aspect

Information about an ownership type

property created : AuditStampClass

Audit stamp capturing the time and actor who created the Ownership Type.

property description : None | str

Description of the Ownership Type

property lastModified : AuditStampClass

Audit stamp capturing the time and actor who last modified the Ownership Type.

property name : str

Display name of the Ownership Type

OwnershipTypeKeyClass

class datahub.metadata.schema_classes.OwnershipTypeKeyClass(id)

Bases: _Aspect

Key for a Ownership Type

  • Parameters:id (str)

property id : str

Unique ID for the data ownership type name i.e. Business Owner, Data Steward, Technical Owner, etc.. Should be separate from the name used for displaying an Ownership Type.

ParametersClass

class datahub.metadata.schema_classes.ParametersClass()

Bases: DictWrapper

Arbitrary key-value parameters for an Entity Change Event. (any record).

PartitionSpecClass

class datahub.metadata.schema_classes.PartitionSpecClass(partition, timePartition = None, type = None)

Bases: DictWrapper

A reference to a specific partition in a dataset.

property partition : str

A unique id / value for the partition for which statistics were collected, generated by applying the key definition to a given row.

property timePartition : None | TimeWindowClass

Time window of the partition, if we are able to extract it from the partition key.

property type : str | PartitionTypeClass

Unused!

PartitionSummaryClass

class datahub.metadata.schema_classes.PartitionSummaryClass(partition, createdTime = None, lastModifiedTime = None)

Bases: DictWrapper

Defines how the data is partitioned

  • Parameters:
    • partition (str)
    • createdTime (Optional[int])
    • lastModifiedTime (Optional[int])

property createdTime : None | int

The created time for a given partition.

property lastModifiedTime : None | int

The last modified / touched time for a given partition.

property partition : str

A unique id / value for the partition for which statistics were collected, generated by applying the key definition to a given row.

PartitionTypeClass

class datahub.metadata.schema_classes.PartitionTypeClass()

Bases: object

FULL_TABLE = 'FULL_TABLE'

PARTITION = 'PARTITION'

QUERY = 'QUERY'

PartitionsSummaryClass

class datahub.metadata.schema_classes.PartitionsSummaryClass(minPartition = None, maxPartition = None)

Bases: _Aspect

Defines how the data is partitioned for Data Lake tables (e.g. Hive, S3, Iceberg, Delta, Hudi, etc).

property maxPartition : None | PartitionSummaryClass

The maximum partition as ordered

property minPartition : None | PartitionSummaryClass

The minimum partition as ordered

PlatformEventClass

class datahub.metadata.schema_classes.PlatformEventClass(header, name, payload)

Bases: DictWrapper

A DataHub Platform Event.

property header : PlatformEventHeaderClass

Header information stored with the event.

property name : str

The name of the event, e.g. the type of event. For example, ‘notificationRequestEvent’, ‘entityChangeEvent’

property payload : GenericPayloadClass

The event payload.

PlatformEventHeaderClass

class datahub.metadata.schema_classes.PlatformEventHeaderClass(timestampMillis)

Bases: DictWrapper

A header included with each DataHub platform event.

  • Parameters:timestampMillis (int)

property timestampMillis : int

The event timestamp field as epoch at UTC in milli seconds.

PlatformResourceInfoClass

class datahub.metadata.schema_classes.PlatformResourceInfoClass(resourceType, primaryKey, secondaryKeys = None, value = None)

Bases: _Aspect

Platform Resource Info. These entities are for miscelaneous data that is used in non-core parts of the system. For instance, if we want to persist & retrieve data from auxiliary integrations such as Slack or Microsoft Teams.

  • Parameters:
    • resourceType (str)
    • primaryKey (str)
    • secondaryKeys (Optional[List[str]])
    • value (Optional[SerializedValueClass]) –

property primaryKey : str

The primary key for this platform resource. e.g. for a slack member this would be the memberID. primary keys specified here don’t need to include any additional specificity for the

dataPlatform

The @PlatformResourceKey is supposed to represent that

property resourceType : str

The type of the resource. Intended as a loose specifier of the generic type of the resource. Producer is not forced to conform to a specific set of symbols for resource types. The @PlatformResourceType enumeration offers a paved path for agreed upon common terms, but is not required to be followed. Example values could be: conversation, user, grant, etc. Resource types are indexed for ease of access. e.g. Get me all platform resources of type user for the platform looker

property secondaryKeys : None | List[str]

The secondary keys this platform resource can be located by. I.e., for a slack member this would be email or phone.

property value : None | SerializedValueClass

The serialized value of this platform resource item.

PlatformResourceKeyClass

class datahub.metadata.schema_classes.PlatformResourceKeyClass(id)

Bases: _Aspect

Key for a Platform Resource. Platform Resources are assets that are not part of the core data model. They are stored in DataHub primarily to help with application-specific use-cases that are not sufficiently generalized to move into the core data model. For instance, if we want to persist & retrieve additional user profile data from auxiliary integrations such as Slack or Microsoft Teams for resolving details later.

  • Parameters:id (str)

property id : str

A unique id for this entity. There are no constraints on the format of this id, but most implementations will choose to use a UUID. This id should be globally unique for the entire DataHub instance and

uniquely identify the resource that is being stored, so most implementations

will combine logical attributes like platform name, platform instance, platform-specific-id and the resource type to create the unique id. e.g. slack:slack-instance:slack-user-id:user-info or guid(slack, slack-instance, slack-user-id, user-info) etc.

PlatformTypeClass

class datahub.metadata.schema_classes.PlatformTypeClass()

Bases: object

Platform types available at LinkedIn

FILE_SYSTEM = 'FILE_SYSTEM'

KEY_VALUE_STORE = 'KEY_VALUE_STORE'

MESSAGE_BROKER = 'MESSAGE_BROKER'

OBJECT_STORE = 'OBJECT_STORE'

OLAP_DATASTORE = 'OLAP_DATASTORE'

OTHERS = 'OTHERS'

QUERY_ENGINE = 'QUERY_ENGINE'

RELATIONAL_DB = 'RELATIONAL_DB'

SEARCH_ENGINE = 'SEARCH_ENGINE'

PolicyMatchConditionClass

class datahub.metadata.schema_classes.PolicyMatchConditionClass()

Bases: object

The matching condition in a filter criterion

EQUALS = 'EQUALS'

NOT_EQUALS = 'NOT_EQUALS'

STARTS_WITH = 'STARTS_WITH'

PolicyMatchCriterionClass

class datahub.metadata.schema_classes.PolicyMatchCriterionClass(field, values, condition = None)

Bases: DictWrapper

A criterion for matching a field with given value

property condition : str | PolicyMatchConditionClass

The condition for the criterion

property field : str

The name of the field that the criterion refers to

property values : List[str]

Values. Matches criterion if any one of the values matches condition (OR-relationship)

PolicyMatchFilterClass

class datahub.metadata.schema_classes.PolicyMatchFilterClass(criteria)

Bases: DictWrapper

The filter for specifying the resource or actor to apply privileges to

property criteria : List[PolicyMatchCriterionClass]

A list of criteria to apply conjunctively (so all criteria must pass)

PostContentClass

class datahub.metadata.schema_classes.PostContentClass(title, type, description = None, link = None, media = None)

Bases: DictWrapper

Content stored inside a Post.

property description : None | str

Optional description of the post.

Optional link that the post is associated with.

property media : None | MediaClass

Optional media that the post is storing

property title : str

Title of the post.

property type : str | PostContentTypeClass

Type of content held in the post.

PostContentTypeClass

class datahub.metadata.schema_classes.PostContentTypeClass()

Bases: object

Enum defining the type of content held in a Post.

TEXT = 'TEXT'

PostInfoClass

class datahub.metadata.schema_classes.PostInfoClass(type, content, created, lastModified, auditStamp = None, target = None)

Bases: _Aspect

Information about a DataHub Post.

property auditStamp : None | AuditStampClass

The audit stamp at which the request was last updated

property content : PostContentClass

Content stored in the post.

property created : int

The time at which the post was initially created

property lastModified : int

The time at which the post was last modified

property target : None | str

Optional Entity URN that the post is associated with.

property type : str | PostTypeClass

Type of the Post.

PostKeyClass

class datahub.metadata.schema_classes.PostKeyClass(id)

Bases: _Aspect

Key for a Post.

  • Parameters:id (str)

property id : str

A unique id for the DataHub Post record. Generated on the server side at Post creation time.

PostTypeClass

class datahub.metadata.schema_classes.PostTypeClass()

Bases: object

Enum defining types of Posts.

ENTITY_ANNOUNCEMENT = 'ENTITY_ANNOUNCEMENT'

HOME_PAGE_ANNOUNCEMENT = 'HOME_PAGE_ANNOUNCEMENT'

PrestoDDLClass

class datahub.metadata.schema_classes.PrestoDDLClass(rawSchema)

Bases: DictWrapper

Schema holder for presto data definition language that describes a presto view.

  • Parameters:rawSchema (str)

property rawSchema : str

The raw schema in the dataset’s platform. This includes the DDL and the columns extracted from DDL.

PropertyCardinalityClass

class datahub.metadata.schema_classes.PropertyCardinalityClass()

Bases: object

MULTIPLE = 'MULTIPLE'

SINGLE = 'SINGLE'

PropertyValueClass

class datahub.metadata.schema_classes.PropertyValueClass(value, description = None)

Bases: DictWrapper

  • Parameters:
    • value (Union[str, float])
    • description (Optional[str])

property description : None | str

Optional description of the property value

property value : str | float

QuantileClass

class datahub.metadata.schema_classes.QuantileClass(quantile, value)

Bases: DictWrapper

  • Parameters:
    • quantile (str)
    • value (str)

property quantile : str

property value : str

QuantitativeAnalysesClass

class datahub.metadata.schema_classes.QuantitativeAnalysesClass(unitaryResults = None, intersectionalResults = None)

Bases: _Aspect

Quantitative analyses should be disaggregated, that is, broken down by the chosen factors. Quantitative analyses should provide the results of evaluating the MLModel according to the chosen metrics, providing confidence interval values when possible.

  • Parameters:
    • unitaryResults (Optional[str])
    • intersectionalResults (Optional[str])

property intersectionalResults : None | str

Link to a dashboard with results showing how the MLModel performed with respect to the intersection of evaluated factors?

property unitaryResults : None | str

Link to a dashboard with results showing how the MLModel performed with respect to each factor

QueryCellClass

class datahub.metadata.schema_classes.QueryCellClass(cellId, changeAuditStamps, rawQuery, cellTitle = None, lastExecuted = None)

Bases: DictWrapper

Query cell in a Notebook, which will present content in query format

property cellId : str

Unique id for the cell. This id should be globally unique for a Notebook tool even when there are multiple deployments of it. As an example, Notebook URL could be used here for QueryBook such as ‘querybook.com/notebook/773/?cellId=1234’

property cellTitle : None | str

Title of the cell

property changeAuditStamps : ChangeAuditStampsClass

Captures information about who created/last modified/deleted this Notebook cell and when

property lastExecuted : None | AuditStampClass

Captures information about who last executed this query cell and when

property rawQuery : str

Raw query to explain some specific logic in a Notebook

QueryKeyClass

class datahub.metadata.schema_classes.QueryKeyClass(id)

Bases: _Aspect

Key for a Query

  • Parameters:id (str)

property id : str

A unique id for the Query.

QueryLanguageClass

class datahub.metadata.schema_classes.QueryLanguageClass()

Bases: object

SQL = 'SQL'

UNKNOWN = 'UNKNOWN'

QueryPropertiesClass

class datahub.metadata.schema_classes.QueryPropertiesClass(statement, source, created, lastModified, customProperties = None, name = None, description = None, origin = None)

Bases: _Aspect

Information about a Query against one or more data assets (e.g. Tables or Views).

property created : AuditStampClass

Audit stamp capturing the time and actor who created the Query.

property customProperties : Dict[str, str]

Custom property bag.

property description : None | str

The Query description.

property lastModified : AuditStampClass

Audit stamp capturing the time and actor who last modified the Query.

property name : None | str

Optional display name to identify the query.

property origin : None | str

The origin of the Query. This is the source of the Query (e.g. a View, Stored Procedure, dbt Model, etc.) that the Query was created from.

property source : str | QuerySourceClass

The source of the Query

property statement : QueryStatementClass

The Query Statement.

QuerySourceClass

class datahub.metadata.schema_classes.QuerySourceClass()

Bases: object

MANUAL = 'MANUAL'

SYSTEM = 'SYSTEM'

QueryStatementClass

class datahub.metadata.schema_classes.QueryStatementClass(value, language = None)

Bases: DictWrapper

A query statement against one or more data assets.

property language : str | QueryLanguageClass

The language of the Query, e.g. SQL.

property value : str

The query text

QuerySubjectClass

class datahub.metadata.schema_classes.QuerySubjectClass(entity)

Bases: DictWrapper

A single subject of a particular query. In the future, we may evolve this model to include richer details about the Query Subject in relation to the query.

  • Parameters:entity (str)

property entity : str

An entity which is the subject of a query.

QuerySubjectsClass

class datahub.metadata.schema_classes.QuerySubjectsClass(subjects)

Bases: _Aspect

Information about the subjects of a particular Query, i.e. the assets being queried.

property subjects : List[QuerySubjectClass]

One or more subjects of the query.

In single-asset queries (e.g. table select), this will contain the Table reference and optionally schema field references.

In multi-asset queries (e.g. table joins), this may contain multiple Table references and optionally schema field references.

QueryUsageStatisticsClass

class datahub.metadata.schema_classes.QueryUsageStatisticsClass(timestampMillis, eventGranularity = None, partitionSpec = None, messageId = None, queryCount = None, queryCost = None, lastExecutedAt = None, uniqueUserCount = None, userCounts = None)

Bases: _Aspect

Stats corresponding to dataset’s usage.

  • Parameters:
    • timestampMillis (int)
    • eventGranularity (Optional[TimeWindowSizeClass]) –
    • partitionSpec (Optional[PartitionSpecClass]) –
    • messageId (Optional[str])
    • queryCount (Optional[int])
    • queryCost (Optional[float])
    • lastExecutedAt (Optional[int])
    • uniqueUserCount (Optional[int])
    • userCounts (Optional[List[DatasetUserUsageCountsClass]]) –

ASPECT_TYPE : ClassVar[str] = 'timeseries'

property eventGranularity : None | TimeWindowSizeClass

Granularity of the event if applicable

property lastExecutedAt : None | int

Last executed timestamp

property messageId : None | str

The optional messageId, if provided serves as a custom user-defined unique identifier for an aspect value.

property partitionSpec : PartitionSpecClass | None

The optional partition specification.

property queryCost : None | float

Query cost for this query and bucket

property queryCount : None | int

Total query count in this bucket

property timestampMillis : int

The event timestamp field as epoch at UTC in milli seconds.

property uniqueUserCount : None | int

Unique user count

property userCounts : None | List[DatasetUserUsageCountsClass]

Users within this bucket, with frequency counts

RecordTypeClass

class datahub.metadata.schema_classes.RecordTypeClass()

Bases: DictWrapper

Record field type.

RelationshipFieldMappingClass

class datahub.metadata.schema_classes.RelationshipFieldMappingClass(sourceField, destinationField)

Bases: DictWrapper

Individual Field Mapping of a relationship- one of several

  • Parameters:
    • sourceField (str)
    • destinationField (str)

property destinationField : str

1

  • Type: All fields from dataset B that are required for the join, maps to aFields 1

property sourceField : str

1

  • Type: All fields from dataset A that are required for the join, maps to bFields 1

RetentionClass

class datahub.metadata.schema_classes.RetentionClass(version = None, time = None)

Bases: DictWrapper

Base class that encapsulates different retention policies. Only one of the fields should be set

property time : None | TimeBasedRetentionClass

property version : None | VersionBasedRetentionClass

RoleAssociationClass

class datahub.metadata.schema_classes.RoleAssociationClass(urn)

Bases: DictWrapper

Properties of an applied Role. For now, just an Urn

  • Parameters:urn (str)

property urn : str

Urn of the External Role

RoleKeyClass

class datahub.metadata.schema_classes.RoleKeyClass(id)

Bases: _Aspect

Key for a External AccessManagement

  • Parameters:id (str)

property id : str

A unique id for the access management IAM.

RoleMembershipClass

class datahub.metadata.schema_classes.RoleMembershipClass(roles)

Bases: _Aspect

Carries information about which roles a user or group is assigned to.

  • Parameters:roles (List[str])

property roles : List[str]

RolePropertiesClass

class datahub.metadata.schema_classes.RolePropertiesClass(name, type, description = None, requestUrl = None, created = None)

Bases: _Aspect

Information about a ExternalRoleProperties

  • Parameters:
    • name (str)
    • type (str)
    • description (Optional[str])
    • requestUrl (Optional[str])
    • created (Optional[AuditStampClass]) –

property created : None | AuditStampClass

Created Audit stamp

property description : None | str

Description of the IAM Role

property name : str

Display name of the IAM Role in the external system

property requestUrl : None | str

Link to access external access management

property type : str

Can be READ, ADMIN, WRITE

RoleUserClass

class datahub.metadata.schema_classes.RoleUserClass(user)

Bases: DictWrapper

Provisioned users of a role

  • Parameters:user (str)

property user : str

Link provisioned corp user for a role

RowCountChangeClass

class datahub.metadata.schema_classes.RowCountChangeClass(type, operator, parameters)

Bases: DictWrapper

Attributes defining a ROW_COUNT_CHANGE volume assertion.

property operator : str | AssertionStdOperatorClass

The operator you’d like to apply.

Note that only numeric operators are valid inputs: GREATER_THAN, GREATER_THAN_OR_EQUAL_TO, EQUAL_TO, LESS_THAN, LESS_THAN_OR_EQUAL_TO, BETWEEN.

property parameters : AssertionStdParametersClass

The parameters you’d like to provide as input to the operator.

Note that only numeric parameter types are valid inputs: NUMBER.

property type : str | AssertionValueChangeTypeClass

a fixed absolute value or a relative percentage.

  • Type: The type of the value used to evaluate the assertion

RowCountTotalClass

class datahub.metadata.schema_classes.RowCountTotalClass(operator, parameters)

Bases: DictWrapper

Attributes defining a ROW_COUNT_TOTAL volume assertion.

property operator : str | AssertionStdOperatorClass

The operator you’d like to apply.

Note that only numeric operators are valid inputs: GREATER_THAN, GREATER_THAN_OR_EQUAL_TO, EQUAL_TO, LESS_THAN, LESS_THAN_OR_EQUAL_TO, BETWEEN.

property parameters : AssertionStdParametersClass

The parameters you’d like to provide as input to the operator.

Note that only numeric parameter types are valid inputs: NUMBER.

RunResultTypeClass

class datahub.metadata.schema_classes.RunResultTypeClass()

Bases: object

FAILURE = 'FAILURE'

SKIPPED = 'SKIPPED'

SUCCESS = 'SUCCESS'

UP_FOR_RETRY = 'UP_FOR_RETRY'

SchemaAssertionCompatibilityClass

class datahub.metadata.schema_classes.SchemaAssertionCompatibilityClass()

Bases: object

EXACT_MATCH = 'EXACT_MATCH'

SUBSET = 'SUBSET'

SUPERSET = 'SUPERSET'

SchemaAssertionInfoClass

class datahub.metadata.schema_classes.SchemaAssertionInfoClass(entity, schema, compatibility = None)

Bases: DictWrapper

Attributes that are applicable to schema assertions

property compatibility : str | SchemaAssertionCompatibilityClass | None

The required compatibility level for the schema assertion to pass.

property entity : str

The entity targeted by the assertion

property schema : SchemaMetadataClass

A definition of the expected structure for the asset

Note that many of the fields of this model, especially those related to metadata (tags, terms) will go unused in this context.

SchemaContractClass

class datahub.metadata.schema_classes.SchemaContractClass(assertion)

Bases: DictWrapper

Expectations for a logical schema

  • Parameters:assertion (str)

property assertion : str

The assertion representing the schema contract.

SchemaFieldAliasesClass

class datahub.metadata.schema_classes.SchemaFieldAliasesClass(aliases = None)

Bases: _Aspect

  • Parameters:aliases (Optional[List[str]])

property aliases : None | List[str]

Used to store aliases

SchemaFieldClass

class datahub.metadata.schema_classes.SchemaFieldClass(fieldPath, type, nativeDataType, jsonPath = None, nullable = None, description = None, label = None, created = None, lastModified = None, recursive = None, globalTags = None, glossaryTerms = None, isPartOfKey = None, isPartitioningKey = None, jsonProps = None)

Bases: DictWrapper

SchemaField to describe metadata related to dataset schema.

  • Parameters:
    • fieldPath (str)
    • type (SchemaFieldDataTypeClass) –
    • nativeDataType (str)
    • jsonPath (Optional[str])
    • nullable (Optional[bool])
    • description (Optional[str])
    • label (Optional[str])
    • created (Optional[AuditStampClass]) –
    • lastModified (Optional[AuditStampClass]) –
    • recursive (Optional[bool])
    • globalTags (Optional[GlobalTagsClass]) –
    • glossaryTerms (Optional[GlossaryTermsClass]) –
    • isPartOfKey (Optional[bool])
    • isPartitioningKey (Optional[bool])
    • jsonProps (Optional[str])

property created : None | AuditStampClass

An AuditStamp corresponding to the creation of this schema field.

property description : None | str

Description

property fieldPath : str

Flattened name of the field. Field is computed from jsonPath field.

property globalTags : None | GlobalTagsClass

Tags associated with the field

property glossaryTerms : None | GlossaryTermsClass

Glossary terms associated with the field

property isPartOfKey : bool

For schema fields that are part of complex keys, set this field to true We do this to easily distinguish between value and key fields

property isPartitioningKey : None | bool

For Datasets which are partitioned, this determines the partitioning key. Note that multiple columns can be part of a partitioning key, but currently we do not support rendering the ordered partitioning key.

property jsonPath : None | str

Flattened name of a field in JSON Path notation.

property jsonProps : None | str

For schema fields that have other properties that are not modeled explicitly, use this field to serialize those properties into a JSON string

property label : None | str

Label of the field. Provides a more human-readable name for the field than field path. Some sources will provide this metadata but not all sources have the concept of a label. If just one string is associated with a field in a source, that is most likely a description.

Note that this field is deprecated and is not surfaced in the UI.

property lastModified : None | AuditStampClass

An AuditStamp corresponding to the last modification of this schema field.

property nativeDataType : str

The native type of the field in the dataset’s platform as declared by platform schema.

property nullable : bool

Indicates if this field is optional or nullable

property recursive : bool

There are use cases when a field in type B references type A. A field in A references field of type B. In such cases, we will mark the first field as recursive.

property type : SchemaFieldDataTypeClass

Platform independent field type of the field.

SchemaFieldDataTypeClass

SchemaFieldInfoClass

class datahub.metadata.schema_classes.SchemaFieldInfoClass(name = None, schemaFieldAliases = None)

Bases: _Aspect

  • Parameters:
    • name (Optional[str])
    • schemaFieldAliases (Optional[List[str]])

property name : None | str

property schemaFieldAliases : None | List[str]

Used to store field path variations for the schemaField urn.

SchemaFieldKeyClass

class datahub.metadata.schema_classes.SchemaFieldKeyClass(parent, fieldPath)

Bases: _Aspect

Key for a SchemaField

  • Parameters:
    • parent (str)
    • fieldPath (str)

property fieldPath : str

fieldPath identifying the schema field

property parent : str

Parent associated with the schema field

SchemaFieldSnapshotClass

class datahub.metadata.schema_classes.SchemaFieldSnapshotClass(urn, aspects)

Bases: DictWrapper

A metadata snapshot for a specific schema field entity.

property aspects : List[SchemaFieldKeyClass]

The list of metadata aspects associated with the dataset. Depending on the use case, this can either be all, or a selection, of supported aspects.

property urn : str

URN for the entity the metadata snapshot is associated with.

SchemaFieldSpecClass

class datahub.metadata.schema_classes.SchemaFieldSpecClass(path, type, nativeType)

Bases: DictWrapper

Lightweight spec used for referencing a particular schema field.

  • Parameters:
    • path (str)
    • type (str)
    • nativeType (str)

property nativeType : str

The native field type

property path : str

The field path

property type : str

The DataHub standard schema field type.

SchemaMetadataClass

class datahub.metadata.schema_classes.SchemaMetadataClass(schemaName, platform, version, hash, platformSchema, fields, created = None, lastModified = None, deleted = None, dataset = None, cluster = None, primaryKeys = None, foreignKeysSpecs = None, foreignKeys = None)

Bases: _Aspect

SchemaMetadata to describe metadata related to store schema

property cluster : None | str

The cluster this schema metadata resides from

property created : AuditStampClass

An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.

property dataset : None | str

Dataset this schema metadata is associated with.

property deleted : None | AuditStampClass

An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.

property fields : List[SchemaFieldClass]

Client provided a list of fields from document schema.

property foreignKeys : None | List[ForeignKeyConstraintClass]

List of foreign key constraints for the schema

property foreignKeysSpecs : None | Dict[str, ForeignKeySpecClass]

Map captures all the references schema makes to external datasets. Map key is ForeignKeySpecName typeref.

property hash : str

the SHA1 hash of the schema content

property lastModified : AuditStampClass

An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.

property platform : str

platform:{platform_name})

  • Type: Standardized platform urn where schema is defined. The data platform Urn (urn
  • Type: li

property platformSchema : EspressoSchemaClass | OracleDDLClass | MySqlDDLClass | PrestoDDLClass | KafkaSchemaClass | BinaryJsonSchemaClass | OrcSchemaClass | SchemalessClass | KeyValueSchemaClass | OtherSchemaClass

The native schema in the dataset’s platform.

property primaryKeys : None | List[str]

Client provided list of fields that define primary keys to access record. Field order defines hierarchical espresso keys. Empty lists indicates absence of primary key access patter. Value is a SchemaField@fieldPath.

property schemaName : str

Schema name e.g. PageViewEvent, identity.Profile, ams.account_management_tracking

property version : int

Every change to SchemaMetadata in the resource results in a new version. Version is server assigned. This version is differ from platform native schema version.

SchemalessClass

class datahub.metadata.schema_classes.SchemalessClass()

Bases: DictWrapper

The dataset has no specific schema associated with it

SearchFieldTypeClass

class datahub.metadata.schema_classes.SearchFieldTypeClass()

Bases: object

BOOLEAN = 'BOOLEAN'

BROWSE_PATH = 'BROWSE_PATH'

BROWSE_PATH_V2 = 'BROWSE_PATH_V2'

COUNT = 'COUNT'

DATETIME = 'DATETIME'

KEYWORD = 'KEYWORD'

OBJECT = 'OBJECT'

TEXT = 'TEXT'

TEXT_PARTIAL = 'TEXT_PARTIAL'

URN = 'URN'

URN_PARTIAL = 'URN_PARTIAL'

WORD_GRAM = 'WORD_GRAM'

SerializedValueClass

class datahub.metadata.schema_classes.SerializedValueClass(blob, contentType = None, schemaType = None, schemaRef = None)

Bases: DictWrapper

Captures the serialized value of a (usually) schema-d blob.

property blob : bytes

The serialized blob value.

property contentType : str | SerializedValueContentTypeClass

The content-type of the serialized blob value.

property schemaRef : None | str

An optional reference to the schema that models the object. e.g., ‘com.linkedin.pegasus2avro.platformresource.slack.SlackConversation’

property schemaType : None | str | SerializedValueSchemaTypeClass

The schema type for the schema that models the object that was serialized : into the blob.

Absence of this field indicates that the schema is not known. If the schema is known, the value should be set to the appropriate schema type. Use the NONE value if the existing schema categories do not apply.

SerializedValueContentTypeClass

class datahub.metadata.schema_classes.SerializedValueContentTypeClass()

Bases: object

BINARY = 'BINARY'

JSON = 'JSON'

SerializedValueSchemaTypeClass

class datahub.metadata.schema_classes.SerializedValueSchemaTypeClass()

Bases: object

AVRO = 'AVRO'

JSON = 'JSON'

NONE = 'NONE'

PEGASUS = 'PEGASUS'

PROTOBUF = 'PROTOBUF'

THRIFT = 'THRIFT'

SiblingsClass

class datahub.metadata.schema_classes.SiblingsClass(siblings, primary)

Bases: _Aspect

Siblings information of an entity.

  • Parameters:
    • siblings (List[str])
    • primary (bool)

property primary : bool

If this is the leader entity of the set of siblings

property siblings : List[str]

List of sibling entities

SlackNotificationSettingsClass

class datahub.metadata.schema_classes.SlackNotificationSettingsClass(userHandle = None, channels = None)

Bases: DictWrapper

Slack Notification settings for an actor.

  • Parameters:
    • userHandle (Optional[str])
    • channels (Optional[List[str]])

property channels : None | List[str]

Optional list of channels to send notifications to

property userHandle : None | str

Optional user handle

SlackUserInfoClass

class datahub.metadata.schema_classes.SlackUserInfoClass(slackInstance, id, name, realName, displayName, teamId, isDeleted, isAdmin, isOwner, isPrimaryOwner, isBot, email = None, timezone = None, timezoneOffset = None, title = None, phone = None, profilePictureUrl = None, statusText = None, statusEmoji = None, lastUpdatedSeconds = None)

Bases: _Aspect

Information about a Slack user.

  • Parameters:
    • slackInstance (str)
    • id (str)
    • name (str)
    • realName (str)
    • displayName (str)
    • teamId (str)
    • isDeleted (bool)
    • isAdmin (bool)
    • isOwner (bool)
    • isPrimaryOwner (bool)
    • isBot (bool)
    • email (Optional[str])
    • timezone (Optional[str])
    • timezoneOffset (Optional[int])
    • title (Optional[str])
    • phone (Optional[str])
    • profilePictureUrl (Optional[str])
    • statusText (Optional[str])
    • statusEmoji (Optional[str])
    • lastUpdatedSeconds (Optional[int])

property displayName : str

The display name of the Slack member.

property email : None | str

The email associated with the Slack member.

property id : str

The unique identifier for the Slack member.

property isAdmin : bool

Whether the member is an admin.

property isBot : bool

Whether the member is a bot.

property isDeleted : bool

Whether the member is deleted or not.

property isOwner : bool

Whether the member is an owner.

property isPrimaryOwner : bool

Whether the member is a primary owner.

property lastUpdatedSeconds : None | int

The timestamp of when the member was last updated. (in seconds)

property name : str

The username of the Slack member.

property phone : None | str

The phone number of the Slack member.

property profilePictureUrl : None | str

The URL of the member’s profile picture.

property realName : str

The real name of the Slack member.

property slackInstance : str

The dataplatform instance that this Slack member belongs to.

property statusEmoji : None | str

The status emoji of the Slack member.

property statusText : None | str

The status text of the Slack member.

property teamId : str

The ID associated with the Slack team.

property timezone : None | str

The timezone of the Slack member.

property timezoneOffset : None | int

The timezone offset of the Slack member.

property title : None | str

The title of the Slack member.

SourceCodeClass

class datahub.metadata.schema_classes.SourceCodeClass(sourceCode)

Bases: _Aspect

Source Code

property sourceCode : List[SourceCodeUrlClass]

Source Code along with types

SourceCodeUrlClass

class datahub.metadata.schema_classes.SourceCodeUrlClass(type, sourceCodeUrl)

Bases: DictWrapper

Source Code Url Entity

property sourceCodeUrl : str

Source Code Url

property type : str | SourceCodeUrlTypeClass

Source Code Url Types

SourceCodeUrlTypeClass

class datahub.metadata.schema_classes.SourceCodeUrlTypeClass()

Bases: object

EVALUATION_PIPELINE_SOURCE_CODE = 'EVALUATION_PIPELINE_SOURCE_CODE'

ML_MODEL_SOURCE_CODE = 'ML_MODEL_SOURCE_CODE'

TRAINING_PIPELINE_SOURCE_CODE = 'TRAINING_PIPELINE_SOURCE_CODE'

SqlAssertionInfoClass

class datahub.metadata.schema_classes.SqlAssertionInfoClass(type, entity, statement, operator, parameters, changeType = None)

Bases: DictWrapper

Attributes defining a SQL Assertion

property changeType : None | str | AssertionValueChangeTypeClass

a fixed absolute value or a relative percentage. This value is required if the type is METRIC_CHANGE.

  • Type: The type of the value used to evaluate the assertion

property entity : str

The entity targeted by this SQL check.

property operator : str | AssertionStdOperatorClass

The operator you’d like to apply to the result of the SQL query.

Note that at this time, only numeric operators are valid inputs: GREATER_THAN, GREATER_THAN_OR_EQUAL_TO, EQUAL_TO, LESS_THAN, LESS_THAN_OR_EQUAL_TO, BETWEEN.

property parameters : AssertionStdParametersClass

The parameters you’d like to provide as input to the operator.

Note that only numeric parameter types are valid inputs: NUMBER.

property statement : str

The SQL statement to be executed when evaluating the assertion (or computing the metric). This should be a valid and complete statement, executable by itself.

Usually this should be a SELECT query statement.

property type : str | SqlAssertionTypeClass

The type of the SQL assertion being monitored.

SqlAssertionTypeClass

class datahub.metadata.schema_classes.SqlAssertionTypeClass()

Bases: object

METRIC = 'METRIC'

METRIC_CHANGE = 'METRIC_CHANGE'

SsoSettingsClass

class datahub.metadata.schema_classes.SsoSettingsClass(baseUrl, oidcSettings = None)

Bases: DictWrapper

SSO Integrations, supported on the UI.

property baseUrl : str

Auth base URL.

property oidcSettings : None | OidcSettingsClass

Optional OIDC SSO settings.

StatusClass

class datahub.metadata.schema_classes.StatusClass(removed = None)

Bases: _Aspect

The lifecycle status metadata of an entity, e.g. dataset, metric, feature, etc. This aspect is used to represent soft deletes conventionally.

  • Parameters:removed (Optional[bool])

property removed : bool

Whether the entity has been removed (soft-deleted).

StringTypeClass

class datahub.metadata.schema_classes.StringTypeClass()

Bases: DictWrapper

String field type.

StructuredExecutionReportClass

class datahub.metadata.schema_classes.StructuredExecutionReportClass(type, serializedValue, contentType)

Bases: DictWrapper

A flexible carrier for structured results of an execution request. The goal is to allow for free flow of structured responses from execution tasks to the orchestrator or observer. The full spectrum of different execution report types is not intended to be modeled by this object.

  • Parameters:
    • type (str)
    • serializedValue (str)
    • contentType (str)

property contentType : str

The content-type of the serialized value (e.g. application/json, application/json;gzip etc.)

property serializedValue : str

The serialized value of the structured report

property type : str

The type of the structured report. (e.g. INGESTION_REPORT, TEST_CONNECTION_REPORT, etc.)

StructuredPropertiesClass

class datahub.metadata.schema_classes.StructuredPropertiesClass(properties)

Bases: _Aspect

Properties about an entity governed by StructuredPropertyDefinition

property properties : List[StructuredPropertyValueAssignmentClass]

Custom property bag.

StructuredPropertyDefinitionClass

class datahub.metadata.schema_classes.StructuredPropertyDefinitionClass(qualifiedName, valueType, entityTypes, displayName = None, typeQualifier = None, allowedValues = None, cardinality = None, description = None, searchConfiguration = None, immutable = None, version = None, created = None, lastModified = None)

Bases: _Aspect

property allowedValues : None | List[PropertyValueClass]

A list of allowed values that the property is allowed to take. If this is not specified, then the property can take any value of given type.

property cardinality : str | PropertyCardinalityClass | None

The cardinality of the property. If not specified, then the property is assumed to be single valued..

property created : None | AuditStampClass

Created Audit stamp

property description : None | str

The description of the property. This is the description that will be shown in the UI.

property displayName : None | str

The display name of the property. This is the name that will be shown in the UI and can be used to look up the property id.

property entityTypes : List[str]

property immutable : bool

Whether the structured property value is immutable once applied to an entity.

property lastModified : None | AuditStampClass

Last Modified Audit stamp

property qualifiedName : str

The fully qualified name of the property. e.g. io.acryl.datahub.myProperty

property searchConfiguration : None | DataHubSearchConfigClass

Search configuration for this property. If not specified, then the property is indexed using the default mapping. from the logical type.

property typeQualifier : None | Dict[str, List[str]]

A map that allows for type specialization of the valueType. e.g. a valueType of urn:li:dataType:datahub.urn can be specialized to be a USER or GROUP URN by adding a typeQualifier like { “allowedTypes”: [”urn:li:entityType:datahub.corpuser”, “urn:li:entityType:datahub.corpGroup”] }

property valueType : str

The value type of the property. Must be a dataType. e.g. To indicate that the property is of type DATE, use urn:li:dataType:datahub.date

property version : None | str

Definition version - Allows breaking schema changes. String is compared case-insensitive and new versions must be monotonically increasing. Cannot use periods/dots. Suggestions: v1, v2

20240610, 20240611

StructuredPropertyKeyClass

class datahub.metadata.schema_classes.StructuredPropertyKeyClass(id)

Bases: _Aspect

  • Parameters:id (str)

property id : str

The id for a structured proeprty.

StructuredPropertyParamsClass

class datahub.metadata.schema_classes.StructuredPropertyParamsClass(urn)

Bases: DictWrapper

  • Parameters:urn (str)

property urn : str

The structured property that is required on this entity

StructuredPropertySettingsClass

class datahub.metadata.schema_classes.StructuredPropertySettingsClass(isHidden = None, showInSearchFilters = None, showInAssetSummary = None, showAsAssetBadge = None, showInColumnsTable = None, lastModified = None)

Bases: _Aspect

Settings specific to a structured property entity

  • Parameters:
    • isHidden (Optional[bool])
    • showInSearchFilters (Optional[bool])
    • showInAssetSummary (Optional[bool])
    • showAsAssetBadge (Optional[bool])
    • showInColumnsTable (Optional[bool])
    • lastModified (Optional[AuditStampClass]) –

property isHidden : bool

Whether or not this asset should be hidden in the main application

property lastModified : None | AuditStampClass

Last Modified Audit stamp

property showAsAssetBadge : bool

Whether or not this asset should be displayed as an asset badge on other asset’s headers

property showInAssetSummary : bool

Whether or not this asset should be displayed in the asset sidebar

property showInColumnsTable : bool

Whether or not this asset should be displayed as a column in the schema field table in a Dataset’s “Columns” tab.

property showInSearchFilters : bool

Whether or not this asset should be displayed as a search filter

StructuredPropertyValueAssignmentClass

class datahub.metadata.schema_classes.StructuredPropertyValueAssignmentClass(propertyUrn, values, created = None, lastModified = None)

Bases: DictWrapper

property created : None | AuditStampClass

Audit stamp containing who created this relationship edge and when

property lastModified : None | AuditStampClass

Audit stamp containing who last modified this relationship edge and when

property propertyUrn : str

The property that is being assigned a value.

property values : List[str | float]

The value assigned to the property.

SubTypesClass

class datahub.metadata.schema_classes.SubTypesClass(typeNames)

Bases: _Aspect

Sub Types. Use this aspect to specialize a generic Entity e.g. Making a Dataset also be a View or also be a LookerExplore

  • Parameters:typeNames (List[str])

property typeNames : List[str]

The names of the specific types.

SystemMetadataClass

class datahub.metadata.schema_classes.SystemMetadataClass(lastObserved = None, runId = None, lastRunId = None, pipelineName = None, registryName = None, registryVersion = None, properties = None, version = None)

Bases: _Aspect

Metadata associated with each metadata change that is processed by the system

  • Parameters:
    • lastObserved (Optional[int])
    • runId (Optional[str])
    • lastRunId (Optional[str])
    • pipelineName (Optional[str])
    • registryName (Optional[str])
    • registryVersion (Optional[str])
    • properties (Optional[Dict[str, str]])
    • version (Optional[str])

property lastObserved : int | None

The timestamp the metadata was observed at

property lastRunId : str | None

The last run id that produced the metadata. Populated in case of batch-ingestion.

property pipelineName : None | str

The ingestion pipeline id that produced the metadata. Populated in case of batch ingestion.

property properties : None | Dict[str, str]

Additional properties

property registryName : None | str

The model registry name that was used to process this event

property registryVersion : None | str

The model registry version that was used to process this event

property runId : str | None

The original run id that produced the metadata. Populated in case of batch-ingestion.

property version : None | str

Aspect version Initial implementation will use the aspect version’s number, however stored as a string in the case where a different aspect versioning scheme is later adopted.

TagAssociationClass

class datahub.metadata.schema_classes.TagAssociationClass(tag, context = None, attribution = None)

Bases: DictWrapper

Properties of an applied tag. For now, just an Urn. In the future we can extend this with other properties, e.g. propagation parameters.

property attribution : None | MetadataAttributionClass

Information about who, why, and how this metadata was applied

property context : None | str

Additional context about the association

property tag : str

Urn of the applied tag

TagKeyClass

class datahub.metadata.schema_classes.TagKeyClass(name)

Bases: _Aspect

Key for a Tag

  • Parameters:name (str)

property name : str

The tag name, which serves as a unique id

TagPropertiesClass

class datahub.metadata.schema_classes.TagPropertiesClass(name, description = None, colorHex = None)

Bases: _Aspect

Properties associated with a Tag

  • Parameters:
    • name (str)
    • description (Optional[str])
    • colorHex (Optional[str])

property colorHex : None | str

The color associated with the Tag in Hex. For example #FFFFFF.

property description : None | str

Documentation of the tag

property name : str

Display name of the tag

TagSnapshotClass

class datahub.metadata.schema_classes.TagSnapshotClass(urn, aspects)

Bases: DictWrapper

A metadata snapshot for a specific dataset entity.

property aspects : List[TagKeyClass | OwnershipClass | TagPropertiesClass | StatusClass]

The list of metadata aspects associated with the dataset. Depending on the use case, this can either be all, or a selection, of supported aspects.

property urn : str

URN for the entity the metadata snapshot is associated with.

TelemetryClientIdClass

class datahub.metadata.schema_classes.TelemetryClientIdClass(clientId)

Bases: _Aspect

A simple wrapper around a String to persist the client ID for telemetry in DataHub’s backend DB

  • Parameters:clientId (str)

property clientId : str

A string representing the telemetry client ID

TelemetryKeyClass

class datahub.metadata.schema_classes.TelemetryKeyClass(name)

Bases: _Aspect

Key for the telemetry client ID, only one should ever exist

  • Parameters:name (str)

property name : str

The telemetry entity name, which serves as a unique id

TestDefinitionClass

class datahub.metadata.schema_classes.TestDefinitionClass(type, json = None)

Bases: DictWrapper

property json : None | str

JSON format configuration for the test

property type : str | TestDefinitionTypeClass

The Test Definition Type

TestDefinitionTypeClass

class datahub.metadata.schema_classes.TestDefinitionTypeClass()

Bases: object

JSON = 'JSON'

TestInfoClass

class datahub.metadata.schema_classes.TestInfoClass(name, category, definition, description = None)

Bases: _Aspect

Information about a DataHub Test

  • Parameters:

property category : str

Category of the test

property definition : TestDefinitionClass

Configuration for the Test

property description : None | str

Description of the test

property name : str

The name of the test

TestKeyClass

class datahub.metadata.schema_classes.TestKeyClass(id)

Bases: _Aspect

Key for a Test

  • Parameters:id (str)

property id : str

Unique id for the test

TestResultClass

class datahub.metadata.schema_classes.TestResultClass(test, type, testDefinitionMd5 = None, lastComputed = None)

Bases: DictWrapper

Information about a Test Result

property lastComputed : None | AuditStampClass

The audit stamp of when the result was computed, including the actor who computed it.

property test : str

The urn of the test

property testDefinitionMd5 : None | str

The md5 of the test definition that was used to compute this result. See TestInfo.testDefinition.md5 for more information.

property type : str | TestResultTypeClass

The type of the result

TestResultTypeClass

class datahub.metadata.schema_classes.TestResultTypeClass()

Bases: object

FAILURE = 'FAILURE'

SUCCESS = 'SUCCESS'

TestResultsClass

class datahub.metadata.schema_classes.TestResultsClass(failing, passing)

Bases: _Aspect

Information about a Test Result

property failing : List[TestResultClass]

Results that are failing

property passing : List[TestResultClass]

Results that are passing

TextCellClass

class datahub.metadata.schema_classes.TextCellClass(cellId, changeAuditStamps, text, cellTitle = None)

Bases: DictWrapper

Text cell in a Notebook, which will present content in text format

property cellId : str

Unique id for the cell. This id should be globally unique for a Notebook tool even when there are multiple deployments of it. As an example, Notebook URL could be used here for QueryBook such as ‘querybook.com/notebook/773/?cellId=1234’

property cellTitle : None | str

Title of the cell

property changeAuditStamps : ChangeAuditStampsClass

Captures information about who created/last modified/deleted this Notebook cell and when

property text : str

The actual text in a TextCell in a Notebook

TimeBasedRetentionClass

class datahub.metadata.schema_classes.TimeBasedRetentionClass(maxAgeInSeconds)

Bases: DictWrapper

Keep records that are less than X seconds old

  • Parameters:maxAgeInSeconds (int)

property maxAgeInSeconds : int

TimeStampClass

class datahub.metadata.schema_classes.TimeStampClass(time, actor = None)

Bases: DictWrapper

A standard event timestamp

  • Parameters:
    • time (int)
    • actor (Optional[str])

property actor : None | str

The actor urn involved in the event.

  • Type: Optional

property time : int

When did the event occur

TimeTypeClass

class datahub.metadata.schema_classes.TimeTypeClass()

Bases: DictWrapper

Time field type. This should also be used for datetimes.

TimeWindowClass

class datahub.metadata.schema_classes.TimeWindowClass(startTimeMillis, length)

Bases: DictWrapper

property length : TimeWindowSizeClass

The length of the window.

property startTimeMillis : int

Start time as epoch at UTC.

TimeWindowSizeClass

class datahub.metadata.schema_classes.TimeWindowSizeClass(unit, multiple = None)

Bases: DictWrapper

Defines the size of a time window.

property multiple : int

How many units. Defaults to 1.

property unit : str | CalendarIntervalClass

Interval unit such as minute/hour/day etc.

TrainingDataClass

class datahub.metadata.schema_classes.TrainingDataClass(trainingData)

Bases: _Aspect

Ideally, the MLModel card would contain as much information about the training data as the evaluation data. However, there might be cases where it is not feasible to provide this level of detailed information about the training data. For example, the data may be proprietary, or require a non-disclosure agreement. In these cases, we advocate for basic details about the distributions over groups in the data, as well as any other details that could inform stakeholders on the kinds of biases the model may have encoded.

property trainingData : List[BaseDataClass]

Details on the dataset(s) used for training the MLModel

TransformationTypeClass

class datahub.metadata.schema_classes.TransformationTypeClass()

Bases: object

Type of the transformation involved in generating destination fields from source fields.

BLACKBOX = 'BLACKBOX'

IDENTITY = 'IDENTITY'

UDFTransformerClass

class datahub.metadata.schema_classes.UDFTransformerClass(udf)

Bases: DictWrapper

Field transformation expressed in UDF

  • Parameters:udf (str)

property udf : str

A UDF mentioning how the source fields got transformed to destination field. This is the FQCN(Fully Qualified Class Name) of the udf.

UnionTypeClass

class datahub.metadata.schema_classes.UnionTypeClass(nestedTypes = None)

Bases: DictWrapper

Union field type.

  • Parameters:nestedTypes (Optional[List[str]])

property nestedTypes : None | List[str]

List of types in union type.

UpstreamClass

class datahub.metadata.schema_classes.UpstreamClass(dataset, type, auditStamp = None, created = None, properties = None, query = None)

Bases: DictWrapper

Upstream lineage information about a dataset including the source reporting the lineage

property auditStamp : AuditStampClass

Audit stamp containing who reported the lineage and when.

property created : None | AuditStampClass

Audit stamp containing who created the lineage and when.

property dataset : str

The upstream dataset the lineage points to

property properties : None | Dict[str, str]

A generic properties bag that allows us to store specific information on this graph edge.

property query : None | str

If the lineage is generated by a query, a reference to the query

property type : str | DatasetLineageTypeClass

The type of the lineage

UpstreamLineageClass

class datahub.metadata.schema_classes.UpstreamLineageClass(upstreams, fineGrainedLineages = None)

Bases: _Aspect

Upstream lineage of a dataset

property fineGrainedLineages : None | List[FineGrainedLineageClass]

List of fine-grained lineage information, including field-level lineage

property upstreams : List[UpstreamClass]

List of upstream dataset lineage information

UrnForeignKeyClass

class datahub.metadata.schema_classes.UrnForeignKeyClass(currentFieldPath)

Bases: DictWrapper

If SchemaMetadata fields make any external references and references are of type com.linkedin.pegasus2avro.common.Urn or any children, this models can be used to mark it.

  • Parameters:currentFieldPath (str)

property currentFieldPath : str

Field in hosting(current) SchemaMetadata.

UsageAggregationClass

class datahub.metadata.schema_classes.UsageAggregationClass(bucket, duration, resource, metrics)

Bases: DictWrapper

Usage data for a given resource, rolled up into a bucket.

property bucket : int

Bucket start time in milliseconds

property duration : str | WindowDurationClass

Bucket duration

property metrics : UsageAggregationMetricsClass

Metrics associated with this bucket

property resource : str

Resource associated with these usage stats

UsageAggregationMetricsClass

class datahub.metadata.schema_classes.UsageAggregationMetricsClass(uniqueUserCount = None, users = None, totalSqlQueries = None, topSqlQueries = None, fields = None)

Bases: DictWrapper

Metrics for usage data for a given resource and bucket. Not all fields make sense for all buckets, so every field is optional.

  • Parameters:

property fields : None | List[FieldUsageCountsClass]

Field-level usage stats

property topSqlQueries : None | List[str]

Frequent SQL queries; mostly makes sense for datasets in SQL databases

property totalSqlQueries : None | int

Total SQL query count

property uniqueUserCount : None | int

Unique user count

property users : None | List[UserUsageCountsClass]

Users within this bucket, with frequency counts

UserUsageCountsClass

class datahub.metadata.schema_classes.UserUsageCountsClass(count, user = None, userEmail = None)

Bases: DictWrapper

Records a single user’s usage counts for a given resource

  • Parameters:
    • count (int)
    • user (Optional[str])
    • userEmail (Optional[str])

property count : int

property user : None | str

property userEmail : None | str

If user_email is set, we attempt to resolve the user’s urn upon ingest

ValueFrequencyClass

class datahub.metadata.schema_classes.ValueFrequencyClass(value, frequency)

Bases: DictWrapper

  • Parameters:
    • value (str)
    • frequency (int)

property frequency : int

property value : str

VersionBasedRetentionClass

class datahub.metadata.schema_classes.VersionBasedRetentionClass(maxVersions)

Bases: DictWrapper

Keep max N latest records

  • Parameters:maxVersions (int)

property maxVersions : int

VersionInfoClass

class datahub.metadata.schema_classes.VersionInfoClass(version, versionType, customProperties = None, externalUrl = None)

Bases: _Aspect

Information about a Data processing job

  • Parameters:
    • version (str)
    • versionType (str)
    • customProperties (Optional[Dict[str, str]])
    • externalUrl (Optional[str])

property customProperties : Dict[str, str]

Custom property bag.

property externalUrl : None | str

URL where the reference exist

property version : str

The version which can indentify a job version like a commit hash or md5 hash

property versionType : str

The type of the version like git hash or md5 hash

VersionPropertiesClass

class datahub.metadata.schema_classes.VersionPropertiesClass(versionSet, version, sortId, aliases = None, comment = None, versioningScheme = None, sourceCreatedTimestamp = None, metadataCreatedTimestamp = None, isLatest = None)

Bases: _Aspect

Properties about a versioned asset i.e. dataset, ML Model, etc.

property aliases : List[VersionTagClass]

Associated aliases for this versioned asset

property comment : None | str

Comment documenting what this version was created for, changes, or represents

property isLatest : None | bool

Marks whether this version is currently the latest. Set by a side effect and should not be modified by API.

property metadataCreatedTimestamp : None | AuditStampClass

Timestamp reflecting when the metadata for this version was created in DataHub

property sortId : str

Sort identifier that determines where a version lives in the order of the Version Set. What this looks like depends on the Version Scheme. For sort ids generated by DataHub we use an 8 character string representation.

property sourceCreatedTimestamp : None | AuditStampClass

Timestamp reflecting when this asset version was created in the source system.

property version : VersionTagClass

Label for this versioned asset, is unique within a version set

property versionSet : str

The linked Version Set entity that ties multiple versioned assets together

property versioningScheme : str | VersioningSchemeClass

What versioning scheme sortId belongs to. Defaults to a plain string that is lexicographically sorted.

VersionSetKeyClass

class datahub.metadata.schema_classes.VersionSetKeyClass(id, entityType)

Bases: _Aspect

Key for a Version Set entity

  • Parameters:
    • id (str)
    • entityType (str)

property entityType : str

Type of entities included in version set, limits to a single entity type between linked versioned entities

property id : str

ID of the Version Set, generated from platform + asset id / name

VersionSetPropertiesClass

class datahub.metadata.schema_classes.VersionSetPropertiesClass(latest, versioningScheme, customProperties = None)

Bases: _Aspect

  • Parameters:
    • latest (str)
    • versioningScheme (Union[str, VersioningSchemeClass]) –
    • customProperties (Optional[Dict[str, str]])

property customProperties : Dict[str, str]

Custom property bag.

property latest : str

The latest versioned entity linked to in this version set

property versioningScheme : str | VersioningSchemeClass

What versioning scheme is being utilized for the versioned entities sort criterion. Static once set

VersionTagClass

class datahub.metadata.schema_classes.VersionTagClass(versionTag = None, metadataAttribution = None)

Bases: DictWrapper

A resource-defined string representing the resource state for the purpose of concurrency control

property metadataAttribution : None | MetadataAttributionClass

property versionTag : None | str

VersioningSchemeClass

class datahub.metadata.schema_classes.VersioningSchemeClass()

Bases: object

ALPHANUMERIC_GENERATED_BY_DATAHUB = 'ALPHANUMERIC_GENERATED_BY_DATAHUB'

LEXICOGRAPHIC_STRING = 'LEXICOGRAPHIC_STRING'

ViewPropertiesClass

class datahub.metadata.schema_classes.ViewPropertiesClass(materialized, viewLogic, viewLanguage, formattedViewLogic = None)

Bases: _Aspect

Details about a View. e.g. Gets activated when subTypes is view

  • Parameters:
    • materialized (bool)
    • viewLogic (str)
    • viewLanguage (str)
    • formattedViewLogic (Optional[str])

property formattedViewLogic : None | str

The formatted view logic. This is particularly used for SQL sources, where the SQL logic is formatted for better readability, and with dbt, where this contains the compiled SQL logic.

property materialized : bool

Whether the view is materialized

property viewLanguage : str

The view logic language / dialect

property viewLogic : str

The view logic

VolumeAssertionInfoClass

class datahub.metadata.schema_classes.VolumeAssertionInfoClass(type, entity, rowCountTotal = None, rowCountChange = None, incrementingSegmentRowCountTotal = None, incrementingSegmentRowCountChange = None, filter = None)

Bases: DictWrapper

Attributes defining a dataset Volume Assertion

property entity : str

The entity targeted by this Volume check.

property filter : None | DatasetFilterClass

A definition of the specific filters that should be applied, when performing monitoring. If not provided, there is no filter, and the full table is under consideration.

property incrementingSegmentRowCountChange : None | IncrementingSegmentRowCountChangeClass

Produce FAILURE Assertion Result if the asset’s incrementing segment row count delta does not meet specific requirements. Required if type is ‘INCREMENTING_SEGMENT_ROW_COUNT_CHANGE’

property incrementingSegmentRowCountTotal : None | IncrementingSegmentRowCountTotalClass

Produce FAILURE Assertion Result if the asset’s latest incrementing segment row count total does not meet specific requirements. Required if type is ‘INCREMENTING_SEGMENT_ROW_COUNT_TOTAL’

property rowCountChange : None | RowCountChangeClass

Produce FAILURE Assertion Result if the delta row count of the asset does not meet specific requirements within a given period of time. Required if type is ‘ROW_COUNT_CHANGE’

property rowCountTotal : None | RowCountTotalClass

Produce FAILURE Assertion Result if the row count of the asset does not meet specific requirements. Required if type is ‘ROW_COUNT_TOTAL’

property type : str | VolumeAssertionTypeClass

The type of the volume assertion being monitored.

VolumeAssertionTypeClass

class datahub.metadata.schema_classes.VolumeAssertionTypeClass()

Bases: object

INCREMENTING_SEGMENT_ROW_COUNT_CHANGE = 'INCREMENTING_SEGMENT_ROW_COUNT_CHANGE'

INCREMENTING_SEGMENT_ROW_COUNT_TOTAL = 'INCREMENTING_SEGMENT_ROW_COUNT_TOTAL'

ROW_COUNT_CHANGE = 'ROW_COUNT_CHANGE'

ROW_COUNT_TOTAL = 'ROW_COUNT_TOTAL'

WindowDurationClass

class datahub.metadata.schema_classes.WindowDurationClass()

Bases: object

Enum to define the length of a bucket when doing aggregations

DAY = 'DAY'

HOUR = 'HOUR'

MONTH = 'MONTH'

WEEK = 'WEEK'

YEAR = 'YEAR'

get_schema_type

datahub.metadata.schema_classes.get_schema_type(fullname)
  • Parameters:fullname (str)
  • Return type:RecordSchema