Skip to main content
The official Python SDK for the Data Legion API. Includes sync and async clients, Pydantic v2 response models, and full type hints.

Installation

pip install datalegion
Requires Python 3.10+.

Authentication

import os
os.environ["DATALEGION_API_KEY"] = "legion_..."

from datalegion import DataLegion
client = DataLegion()

Person

Enrichment

Look up a person by email, phone, name, LinkedIn, or other identifiers.
from datalegion import DataLegion

client = DataLegion(api_key="legion_...")

person = client.person.enrich(email="jane.doe@example.com")

print(person.full_name)          # "Jane Doe"
print(person.job_title)          # "VP of Engineering"
print(person.company_name)       # "Acme Corp"
print(person.city, person.state) # "San Francisco" "California"
Return multiple matches with confidence scores by setting multiple_results=True.
matches = client.person.enrich(
    first_name="Jane",
    last_name="Doe",
    company="Acme",
    multiple_results=True,
    limit=5,
    min_confidence="high",
)

print(f"Found {matches.total} matches")
for match in matches.matches:
    meta = match.match_metadata
    print(f"{match.person.full_name} - {meta.match_confidence}")
Query people using SQL WHERE syntax.
results = client.person.search(
    query="job_title ILIKE '%engineer%' AND city = 'San Francisco'",
    limit=25,
)

for match in results.matches:
    print(f"{match.person.full_name} - {match.person.job_title}")

Discovery

Find people using natural language.
results = client.person.discover(
    query="product managers at AI startups in New York",
    limit=10,
)

# The generated SQL is available via client metadata
print(f"Generated query: {client.generated_query}")

for match in results.matches:
    print(f"{match.person.full_name} at {match.person.company_name}")

Company

Enrichment

Look up a company by domain, name, ticker, or LinkedIn.
company = client.company.enrich(domain="google.com")

print(company.name)                    # CleanedRaw object
print(company.industry)                # "Internet"
print(company.legion_employee_count)   # 182000
print(company.legion_employee_growth_rate)  # {"1m": 0.02, "3m": 0.05, ...}

Search

Query companies using SQL WHERE syntax.
results = client.company.search(
    query="industry = 'Artificial Intelligence' AND legion_employee_count > 50",
    limit=20,
)

for match in results.matches:
    print(f"{match.company.name} - {match.company.industry}")

Discovery

Find companies using natural language.
results = client.company.discover(
    query="fintech companies in London with over 200 employees",
    limit=10,
)

for match in results.matches:
    print(f"{match.company.name} - {match.company.legion_employee_count} employees")

Utility Endpoints

Utility endpoints are free and don’t consume credits.

Clean Fields

cleaned = client.utility.clean(
    fields={
        "email": "  JANE.DOE+work@GMAIL.COM  ",
        "phone": "(555) 123-4567",
        "domain": "https://www.Google.com/about",
    }
)

for field, result in cleaned.results.items():
    print(f"{field}: {result.original} -> {result.cleaned}")

Hash Email

hashed = client.utility.hash_email(email="jane@example.com")

print(hashed.normalized_email)    # "jane@example.com"
print(hashed.hashes["sha256"])    # SHA-256 hash
print(hashed.hashes["md5"])       # MD5 hash

Validate Data

result = client.utility.validate(
    email="not-an-email",
    phone="+15551234567",
    company="Google",
)

if not result.valid:
    for error in result.errors:
        print(f"{error.field}: {error.error}")

Async Client

The AsyncDataLegion client has the same interface but all methods are async.
import asyncio
from datalegion import AsyncDataLegion

async def main():
    async with AsyncDataLegion(api_key="legion_...") as client:
        person = await client.person.enrich(email="jane@example.com")
        print(person.full_name)

        # Concurrent requests
        results = await asyncio.gather(
            client.person.enrich(email="jane@example.com"),
            client.company.enrich(domain="google.com"),
        )

asyncio.run(main())

Response Metadata

After each request, response metadata is available on the client.
person = client.person.enrich(email="jane@example.com")

print(client.request_id)            # Unique request ID
print(client.credits_used)          # Credits consumed
print(client.credits_remaining)     # Remaining balance
print(client.rate_limit_remaining)  # Requests left in window
print(client.generated_query)       # SQL from discover endpoints

Field Filtering

Control which fields are returned in the response.
# Only return specific fields
person = client.person.enrich(
    email="jane@example.com",
    include_fields="full_name,job_title,company_name,linkedin_url",
)

# Exclude fields
person = client.person.enrich(
    email="jane@example.com",
    exclude_fields="phones,locations,education",
)

# Require fields (skip records missing these)
person = client.person.enrich(
    email="jane@example.com",
    required_fields="work_email,mobile_phone",
)

Error Handling

The SDK raises typed exceptions for different error conditions.
from datalegion import (
    DataLegion,
    AuthenticationError,
    InsufficientCreditsError,
    ValidationError,
    RateLimitError,
    APIError,
)

client = DataLegion(api_key="legion_...")

try:
    person = client.person.enrich(email="jane@example.com")
except AuthenticationError:
    # 401 - Invalid or missing API key
    print("Check your API key")
except InsufficientCreditsError as e:
    # 402 - Out of credits
    print(f"Out of credits: {e.message}")
except ValidationError as e:
    # 422 - Invalid request parameters
    print(f"Invalid input: {e.details}")
except RateLimitError as e:
    # 429 - Rate limit exceeded
    print(f"Rate limited, retry after {client.retry_after}s")
except APIError as e:
    # 5xx - Server error
    print(f"Server error ({e.status_code}): {e.message}")

Configuration

client = DataLegion(
    api_key="legion_...",                    # Or set DATALEGION_API_KEY
    base_url="https://api.datalegion.ai",         # Default
    timeout=60.0,                                 # Seconds, default 60
)
You can also pass a custom httpx client for advanced configuration:
import httpx

custom_client = httpx.Client(
    timeout=30.0,
    headers={"X-Correlation-ID": "my-request-123"},
)

client = DataLegion(httpx_client=custom_client)

Resources