Skip to main content

Test Python

In this guide, you'll learn how to add data quality and validation tests to your Python Components, create dedicated Python Test Components, and implement specialized tests for custom validation logic.

Prerequisites

info

For a comprehensive overview of test types and when to use them, see our Tests concept guide.

Test behavior

Tests accept a severity parameter that can be set to error or warn.

error is the default severity, meaning that failed tests cause the entire Component to fail. To log warnings instead of failing, set severity="warn":

test("not_null", column="id", severity="warn")

Native tests in Transforms

Add native tests to Python Components by including them in the @transform decorator:

from ascend.resources import transform, test, ref

@transform(
inputs=[ref("source")],
tests=[
test("not_null", column="id"),
test("count_equal", count=1000, severity="warn"),
],
)
def my_transform(source, context):
return source

Column-level tests

Validate individual columns by specifying the column parameter:

from ascend.resources import transform, test, ref
from ascend.common.formats import ibis_table

@transform(
inputs=[ref("raw_customers")],
materialized="table",
tests=[
test("not_null", column="customer_id"),
test("not_empty", column="customer_id"),
test("unique", column="email"),
test("in_range", column="age", min=0, max=120),
test("in_set", column="status", values=["active", "inactive", "pending"]),
],
)
def validated_customers(raw_customers: ibis_table, context) -> ibis_table:
return raw_customers

Component-level tests

Validate the entire dataset by omitting the column parameter:

from ascend.resources import transform, test, ref
from ascend.common.formats import ibis_table

@transform(
inputs=[ref("orders")],
materialized="table",
tests=[
test("count_greater_than", count=0),
test("count_less_than", count=1000000),
test("combination_unique", columns=["order_id", "line_item_id"]),
],
)
def validated_orders(orders: ibis_table, context) -> ibis_table:
return orders

Generic tests

Generic tests are reusable test functions that can be applied to any Component. Define them once and reference them in multiple places.

Define a generic test

Create a Python file with the @generic_test decorator:

src/tests/custom_tests.py
from ascend.resources import TestResult, generic_test

@generic_test
def min_row_count(context, component, threshold: int):
"""Validates that component has at least threshold rows."""
count = component.count().execute()

if count < threshold:
return TestResult.empty(
"min_row_count",
False,
f"Row count {count} is below threshold {threshold}"
)
return TestResult.empty("min_row_count", True)


@generic_test
def column_cardinality(context, component, column: str, min_distinct: int, max_distinct: int):
"""Validates that a column has between min and max distinct values."""
distinct_count = component[column].nunique().execute()

if distinct_count < min_distinct or distinct_count > max_distinct:
return TestResult.empty(
"column_cardinality",
False,
f"Column {column} has {distinct_count} distinct values, expected between {min_distinct} and {max_distinct}"
)
return TestResult.empty("column_cardinality", True)

Reference generic tests

Reference generic tests in your Component's YAML configuration:

component:
transform:
python:
entrypoint: my_project.transforms.process_data
tests:
component:
- python:
entrypoint: my_project.tests.custom_tests.min_row_count
params:
threshold: 100
- python:
entrypoint: my_project.tests.custom_tests.column_cardinality
params:
column: category
min_distinct: 5
max_distinct: 50

Singular tests

Singular tests validate data across one or more Components. Use them for cross-component validation, referential integrity checks, and complex business rules.

Basic singular test

tests/validate_order_totals.py
from ascend.resources import TestResult, ref, singular_test

@singular_test(
inputs=[ref("orders"), ref("order_items")],
severity="error"
)
def validate_order_totals(context, orders, order_items):
"""Validates that order totals match sum of line items."""

# Calculate expected totals from line items
line_totals = order_items.group_by("order_id").agg(
calculated_total=order_items["price"] * order_items["quantity"]
).sum()

# Join with orders and find mismatches
comparison = orders.join(line_totals, "order_id")
mismatches = comparison.filter(
comparison["total"] != comparison["calculated_total"]
)

mismatch_count = mismatches.count().execute()

if mismatch_count > 0:
yield TestResult(
"order_total_mismatch",
False,
mismatches.limit(10), # Sample of failing rows
f"Found {mismatch_count} orders with incorrect totals"
)
else:
yield TestResult.empty("order_total_mismatch", True)

Referential integrity test

tests/validate_foreign_keys.py
from ascend.resources import TestResult, ref, singular_test

@singular_test(
inputs=[ref("order_items"), ref("products")],
severity="error"
)
def validate_product_references(context, order_items, products):
"""Validates that all product_ids in order_items exist in products."""

# Find orphaned references
orphaned = order_items.anti_join(products, "product_id")
orphan_count = orphaned.count().execute()

if orphan_count > 0:
yield TestResult(
"orphaned_product_refs",
False,
orphaned.select("order_item_id", "product_id").limit(10),
f"Found {orphan_count} order items referencing non-existent products"
)
else:
yield TestResult.empty("orphaned_product_refs", True)

Multiple validations in one test

Singular tests can yield multiple test results:

tests/comprehensive_validation.py
from ascend.resources import TestResult, ref, singular_test

@singular_test(inputs=[ref("customers")], severity="error")
def validate_customer_data(context, customers):
"""Runs multiple validations on customer data."""

# Check for missing contact info
missing_contact = customers.filter(
customers["email"].isnull() & customers["phone"].isnull()
)
if missing_contact.count().execute() > 0:
yield TestResult(
"missing_contact",
False,
missing_contact.limit(10),
"Customers missing both email and phone"
)
else:
yield TestResult.empty("missing_contact", True)

# Check for duplicate emails
email_counts = customers.group_by("email").count()
duplicates = email_counts.filter(email_counts["count"] > 1)
if duplicates.count().execute() > 0:
yield TestResult(
"duplicate_emails",
False,
duplicates.limit(10),
"Found duplicate email addresses"
)
else:
yield TestResult.empty("duplicate_emails", True)

Asset tests

Asset tests run during the execution of a Read Component, validating data as it's ingested. Use them for source data validation.

components/customers_read.py
from ascend.resources import TestResult, asset_test, read

@read(connection="warehouse", table="raw_customers")
def customers(context):
return context.connection.read("raw_customers")


@asset_test(severity="error")
def validate_customer_source(context, customers):
"""Validates source customer data during read."""

# Check for required fields
null_ids = customers.filter(customers["customer_id"].isnull())
if null_ids.count().execute() > 0:
yield TestResult(
"null_customer_ids",
False,
null_ids.limit(10),
"Found customers with null IDs in source"
)
else:
yield TestResult.empty("null_customer_ids", True)

# Validate email format
invalid_emails = customers.filter(
~customers["email"].like("%@%.%")
)
if invalid_emails.count().execute() > 0:
yield TestResult(
"invalid_email_format",
False,
invalid_emails.select("customer_id", "email").limit(10),
"Found customers with invalid email format"
)
else:
yield TestResult.empty("invalid_email_format", True)

Test results

Test functions return TestResult objects:

from ascend.resources import TestResult

# Passing test
TestResult.empty("test_name", True)

# Failing test with message
TestResult.empty("test_name", False, "Error description")

# Failing test with sample data
TestResult("test_name", False, failing_rows_dataframe, "Error description")

Test result states:

  • PASS: Test passed
  • FAIL_WARN: Failed with severity="warn" (processing continues)
  • FAIL_ERROR: Failed with severity="error" (halts processing)

Best practices

  1. Use appropriate severity: Set severity="error" for critical validations that should halt processing, severity="warn" for informational checks
  2. Return sample data: Include sample failing rows in test results to aid debugging
  3. Keep tests focused: Each test should validate one specific condition
  4. Use descriptive names: Test names appear in results; make them clear and actionable
  5. Test at the right level: Use column tests for value validation, Component tests for aggregations, singular tests for cross-component checks

Next steps