metadata-integration/java/docs/sdk-v2/container-entity.md
The Container entity represents hierarchical groupings of data assets (databases, schemas, folders, projects). This guide covers container operations in SDK V2.
Containers organize data assets into hierarchical structures. Common use cases:
Containers use GUID-based URNs generated from their properties (platform, database, schema, etc.), ensuring deterministic URNs for the same logical container.
Container URNs follow the pattern:
urn:li:container:{guid}
The GUID is generated by hashing a set of properties (platform, database, schema, env, etc.). This ensures:
Example:
Container database = Container.builder()
.platform("snowflake")
.database("analytics_db")
.env("PROD")
.displayName("Analytics Database")
.build();
String urn = database.getContainerUrn();
// urn:li:container:{guid-based-on-properties}
Container database = Container.builder()
.platform("snowflake")
.database("analytics_db")
.env("PROD")
.displayName("Analytics Database")
.description("Production analytics database")
.qualifiedName("prod.snowflake.analytics_db")
.build();
Container schema = Container.builder()
.platform("snowflake")
.database("analytics_db")
.schema("public")
.env("PROD")
.displayName("Public Schema")
.qualifiedName("prod.snowflake.analytics_db.public")
.parentContainer(database.getContainerUrn())
.build();
Map<String, String> properties = new HashMap<>();
properties.put("size_gb", "2500");
properties.put("table_count", "150");
properties.put("owner_team", "data_platform");
Container database = Container.builder()
.platform("postgres")
.database("production")
.displayName("Production Database")
.customProperties(properties)
.build();
Container database = Container.builder()
.platform("bigquery")
.database("analytics")
.displayName("Analytics Database")
.externalUrl("https://console.cloud.google.com/bigquery/project/analytics")
.build();
Containers support explicit parent-child relationships for organizing data assets hierarchically.
Database → Schema hierarchy:
// Level 1: Database
Container database = Container.builder()
.platform("postgres")
.database("production")
.env("PROD")
.displayName("Production Database")
.build();
// Level 2: Schema (child of database)
Container schema = Container.builder()
.platform("postgres")
.database("production")
.schema("public")
.env("PROD")
.displayName("Public Schema")
.parentContainer(database.getContainerUrn())
.build();
Database → Schema → Table Group:
// Level 1: Database
Container database = Container.builder()
.platform("snowflake")
.database("analytics")
.displayName("Analytics Database")
.build();
// Level 2: Schema
Container schema = Container.builder()
.platform("snowflake")
.database("analytics")
.schema("public")
.displayName("Public Schema")
.parentContainer(database.getContainerUrn())
.build();
// Level 3: Logical grouping
Container tableGroup = Container.builder()
.platform("snowflake")
.database("analytics")
.schema("public")
.displayName("Customer Tables")
.qualifiedName("analytics.public.customer_group")
.parentContainer(schema.getContainerUrn())
.build();
// Set parent container
container.setContainer("urn:li:container:{parent-guid}");
// Get parent container
String parentUrn = container.getParentContainer();
// Clear parent container
container.clearContainer();
Categorize containers with tags:
container.addTag("production");
container.addTag("tier1");
container.addTag("pii");
// Or use full URN
container.addTag("urn:li:tag:critical");
Add owners with different ownership types:
import com.linkedin.common.OwnershipType;
// Add technical owner
container.addOwner("urn:li:corpuser:data_platform_team",
OwnershipType.TECHNICAL_OWNER);
// Add data steward
container.addOwner("urn:li:corpuser:analytics_lead",
OwnershipType.DATA_STEWARD);
// Remove owner
container.removeOwner("urn:li:corpuser:data_platform_team");
Associate business glossary terms:
container.addTerm("urn:li:glossaryTerm:ProductionDatabase");
container.addTerm("urn:li:glossaryTerm:CustomerData");
// Remove term
container.removeTerm("urn:li:glossaryTerm:ProductionDatabase");
Assign container to a domain:
container.setDomain("urn:li:domain:Analytics");
// Clear all domains
container.clearDomains();
Set or update container description:
// Updates editableContainerProperties
container.setDescription("Production database for analytics workloads");
// Display name
String displayName = container.getDisplayName();
// Qualified name
String qualifiedName = container.getQualifiedName();
// Description
String description = container.getDescription();
// External URL
String externalUrl = container.getExternalUrl();
// Custom properties
Map<String, String> customProps = container.getCustomProperties();
// Parent container
String parentUrn = container.getParentContainer();
Snowflake Database and Schema:
// Database container
Container database = Container.builder()
.platform("snowflake")
.database("analytics")
.env("PROD")
.displayName("Analytics Database")
.description("Primary analytics database")
.build();
database
.addTag("production")
.addTag("analytics")
.addOwner("urn:li:corpuser:data_platform", OwnershipType.TECHNICAL_OWNER)
.setDomain("urn:li:domain:Analytics");
// Schema container
Container schema = Container.builder()
.platform("snowflake")
.database("analytics")
.schema("public")
.env("PROD")
.displayName("Public Schema")
.description("Main schema for analytics tables")
.parentContainer(database.getContainerUrn())
.build();
schema
.addTag("public")
.addOwner("urn:li:corpuser:analytics_team", OwnershipType.TECHNICAL_OWNER)
.setDomain("urn:li:domain:Analytics");
// Project container
Container project = Container.builder()
.platform("bigquery")
.database("my-project")
.env("PROD")
.displayName("My GCP Project")
.externalUrl("https://console.cloud.google.com/bigquery/project/my-project")
.build();
// Dataset container
Container dataset = Container.builder()
.platform("bigquery")
.database("my-project")
.schema("analytics")
.env("PROD")
.displayName("Analytics Dataset")
.parentContainer(project.getContainerUrn())
.build();
// Bucket container
Container bucket = Container.builder()
.platform("s3")
.database("my-data-lake")
.env("PROD")
.displayName("Data Lake Bucket")
.build();
// Folder container
Map<String, String> folderProps = new HashMap<>();
folderProps.put("folder_path", "/raw/customer_data");
folderProps.put("file_count", "1500");
Container folder = Container.builder()
.platform("s3")
.database("my-data-lake")
.schema("raw")
.env("PROD")
.displayName("Customer Data Folder")
.parentContainer(bucket.getContainerUrn())
.customProperties(folderProps)
.build();
All mutation operations return the container instance for method chaining:
Container database = Container.builder()
.platform("snowflake")
.database("analytics")
.displayName("Analytics Database")
.build();
database
.addTag("production")
.addTag("tier1")
.addOwner("urn:li:corpuser:data_team", OwnershipType.TECHNICAL_OWNER)
.addOwner("urn:li:corpuser:analytics_lead", OwnershipType.DATA_STEWARD)
.addTerm("urn:li:glossaryTerm:ProductionDatabase")
.setDomain("urn:li:domain:Analytics")
.setDescription("Production analytics database");
DataHubClientV2 client = DataHubClientV2.builder()
.server("http://localhost:8080")
.build();
// Create hierarchy
Container database = Container.builder()
.platform("snowflake")
.database("analytics")
.displayName("Analytics Database")
.build();
Container schema = Container.builder()
.platform("snowflake")
.database("analytics")
.schema("public")
.displayName("Public Schema")
.parentContainer(database.getContainerUrn())
.build();
// Upsert in order: parent before children
client.entities().upsert(database);
client.entities().upsert(schema);
import com.linkedin.common.OwnershipType;
import datahub.client.v2.DataHubClientV2;
import datahub.client.v2.entity.Container;
import java.util.HashMap;
import java.util.Map;
public class ContainerExample {
public static void main(String[] args) throws Exception {
DataHubClientV2 client = DataHubClientV2.builder()
.server("http://localhost:8080")
.build();
// Create database container
Map<String, String> dbProps = new HashMap<>();
dbProps.put("database_type", "analytics");
dbProps.put("size_gb", "5000");
Container database = Container.builder()
.platform("snowflake")
.database("analytics_db")
.env("PROD")
.displayName("Analytics Database")
.qualifiedName("prod.snowflake.analytics_db")
.description("Production analytics database")
.externalUrl("https://snowflake.example.com/databases/analytics_db")
.customProperties(dbProps)
.build();
database
.addTag("production")
.addTag("analytics")
.addTag("tier1")
.addOwner("urn:li:corpuser:data_platform", OwnershipType.TECHNICAL_OWNER)
.addOwner("urn:li:corpuser:analytics_lead", OwnershipType.DATA_STEWARD)
.addTerm("urn:li:glossaryTerm:ProductionDatabase")
.setDomain("urn:li:domain:Analytics");
// Create schema container
Map<String, String> schemaProps = new HashMap<>();
schemaProps.put("table_count", "150");
schemaProps.put("refresh_schedule", "hourly");
Container schema = Container.builder()
.platform("snowflake")
.database("analytics_db")
.schema("public")
.env("PROD")
.displayName("Public Schema")
.qualifiedName("prod.snowflake.analytics_db.public")
.description("Main schema for analytics tables")
.parentContainer(database.getContainerUrn())
.customProperties(schemaProps)
.build();
schema
.addTag("public")
.addTag("production-ready")
.addOwner("urn:li:corpuser:analytics_team", OwnershipType.TECHNICAL_OWNER)
.setDomain("urn:li:domain:Analytics");
// Upsert to DataHub
client.entities().upsert(database);
client.entities().upsert(schema);
System.out.println("Created container hierarchy:");
System.out.println(" Database: " + database.getContainerUrn());
System.out.println(" Schema: " + schema.getContainerUrn());
client.close();
}
}