refactor(init_db): remove demo data file and implement structured seeding

- Delete `demo_data.json` replaced by structured logic for better modularity.
- Add support for seeding default agent types and new demo data structure.
- Ensure demo mode only executes when explicitly enabled (settings.DEMO_MODE).
- Enhance logging for improved debugging during DB initialization.
This commit is contained in:
2026-01-06 02:34:34 +01:00
parent 1dcf99ee38
commit 92782bcb05
4 changed files with 1096 additions and 424 deletions

View File

@@ -3,27 +3,48 @@
Async database initialization script.
Creates the first superuser if configured and doesn't already exist.
Seeds default agent types (production data) and demo data (when DEMO_MODE is enabled).
"""
import asyncio
import json
import logging
import random
from datetime import UTC, datetime, timedelta
from datetime import UTC, date, datetime, timedelta
from pathlib import Path
from sqlalchemy import select, text
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.config import settings
from app.core.database import SessionLocal, engine
from app.crud.syndarix.agent_type import agent_type as agent_type_crud
from app.crud.user import user as user_crud
from app.models.organization import Organization
from app.models.syndarix import AgentInstance, AgentType, Issue, Project, Sprint
from app.models.syndarix.enums import (
AgentStatus,
AutonomyLevel,
ClientMode,
IssuePriority,
IssueStatus,
IssueType,
ProjectComplexity,
ProjectStatus,
SprintStatus,
)
from app.models.user import User
from app.models.user_organization import UserOrganization
from app.schemas.syndarix import AgentTypeCreate
from app.schemas.users import UserCreate
logger = logging.getLogger(__name__)
# Data file paths
DATA_DIR = Path(__file__).parent.parent / "data"
DEFAULT_AGENT_TYPES_PATH = DATA_DIR / "default_agent_types.json"
DEMO_DATA_PATH = DATA_DIR / "demo_data.json"
async def init_db() -> User | None:
"""
@@ -54,28 +75,29 @@ async def init_db() -> User | None:
if existing_user:
logger.info(f"Superuser already exists: {existing_user.email}")
return existing_user
else:
# Create superuser if doesn't exist
user_in = UserCreate(
email=superuser_email,
password=superuser_password,
first_name="Admin",
last_name="User",
is_superuser=True,
)
# Create superuser if doesn't exist
user_in = UserCreate(
email=superuser_email,
password=superuser_password,
first_name="Admin",
last_name="User",
is_superuser=True,
)
existing_user = await user_crud.create(session, obj_in=user_in)
await session.commit()
await session.refresh(existing_user)
logger.info(f"Created first superuser: {existing_user.email}")
user = await user_crud.create(session, obj_in=user_in)
await session.commit()
await session.refresh(user)
# ALWAYS load default agent types (production data)
await load_default_agent_types(session)
logger.info(f"Created first superuser: {user.email}")
# Create demo data if in demo mode
# Only load demo data if in demo mode
if settings.DEMO_MODE:
await load_demo_data(session)
return user
return existing_user
except Exception as e:
await session.rollback()
@@ -88,26 +110,89 @@ def _load_json_file(path: Path):
return json.load(f)
async def load_demo_data(session):
"""Load demo data from JSON file."""
demo_data_path = Path(__file__).parent / "core" / "demo_data.json"
if not demo_data_path.exists():
logger.warning(f"Demo data file not found: {demo_data_path}")
async def load_default_agent_types(session: AsyncSession) -> None:
"""
Load default agent types from JSON file.
These are production defaults - created only if they don't exist, never overwritten.
This allows users to customize agent types without worrying about server restarts.
"""
if not DEFAULT_AGENT_TYPES_PATH.exists():
logger.warning(
f"Default agent types file not found: {DEFAULT_AGENT_TYPES_PATH}"
)
return
try:
# Use asyncio.to_thread to avoid blocking the event loop
data = await asyncio.to_thread(_load_json_file, demo_data_path)
data = await asyncio.to_thread(_load_json_file, DEFAULT_AGENT_TYPES_PATH)
# Create Organizations
org_map = {}
for org_data in data.get("organizations", []):
# Check if org exists
result = await session.execute(
text("SELECT * FROM organizations WHERE slug = :slug"),
{"slug": org_data["slug"]},
for agent_type_data in data:
slug = agent_type_data["slug"]
# Check if agent type already exists
existing = await agent_type_crud.get_by_slug(session, slug=slug)
if existing:
logger.debug(f"Agent type already exists: {agent_type_data['name']}")
continue
# Create the agent type
agent_type_in = AgentTypeCreate(
name=agent_type_data["name"],
slug=slug,
description=agent_type_data.get("description"),
expertise=agent_type_data.get("expertise", []),
personality_prompt=agent_type_data["personality_prompt"],
primary_model=agent_type_data["primary_model"],
fallback_models=agent_type_data.get("fallback_models", []),
model_params=agent_type_data.get("model_params", {}),
mcp_servers=agent_type_data.get("mcp_servers", []),
tool_permissions=agent_type_data.get("tool_permissions", {}),
is_active=agent_type_data.get("is_active", True),
)
existing_org = result.first()
await agent_type_crud.create(session, obj_in=agent_type_in)
logger.info(f"Created default agent type: {agent_type_data['name']}")
logger.info("Default agent types loaded successfully")
except Exception as e:
logger.error(f"Error loading default agent types: {e}")
raise
async def load_demo_data(session: AsyncSession) -> None:
"""
Load demo data from JSON file.
Only runs when DEMO_MODE is enabled. Creates demo organizations, users,
projects, sprints, agent instances, and issues.
"""
if not DEMO_DATA_PATH.exists():
logger.warning(f"Demo data file not found: {DEMO_DATA_PATH}")
return
try:
data = await asyncio.to_thread(_load_json_file, DEMO_DATA_PATH)
# Build lookup maps for FK resolution
org_map: dict[str, Organization] = {}
user_map: dict[str, User] = {}
project_map: dict[str, Project] = {}
sprint_map: dict[str, Sprint] = {} # key: "project_slug:sprint_number"
agent_type_map: dict[str, AgentType] = {}
agent_instance_map: dict[
str, AgentInstance
] = {} # key: "project_slug:agent_name"
# ========================
# 1. Create Organizations
# ========================
for org_data in data.get("organizations", []):
org_result = await session.execute(
select(Organization).where(Organization.slug == org_data["slug"])
)
existing_org = org_result.scalar_one_or_none()
if not existing_org:
org = Organization(
@@ -117,29 +202,20 @@ async def load_demo_data(session):
is_active=True,
)
session.add(org)
await session.flush() # Flush to get ID
org_map[org.slug] = org
await session.flush()
org_map[str(org.slug)] = org
logger.info(f"Created demo organization: {org.name}")
else:
# We can't easily get the ORM object from raw SQL result for map without querying again or mapping
# So let's just query it properly if we need it for relationships
# But for simplicity in this script, let's just assume we created it or it exists.
# To properly map for users, we need the ID.
# Let's use a simpler approach: just try to create, if slug conflict, skip.
pass
org_map[str(existing_org.slug)] = existing_org
# Re-query all orgs to build map for users
result = await session.execute(select(Organization))
orgs = result.scalars().all()
org_map = {org.slug: org for org in orgs}
# Create Users
# ========================
# 2. Create Users
# ========================
for user_data in data.get("users", []):
existing_user = await user_crud.get_by_email(
session, email=user_data["email"]
)
if not existing_user:
# Create user
user_in = UserCreate(
email=user_data["email"],
password=user_data["password"],
@@ -151,17 +227,13 @@ async def load_demo_data(session):
user = await user_crud.create(session, obj_in=user_in)
# Randomize created_at for demo data (last 30 days)
# This makes the charts look more realistic
days_ago = random.randint(0, 30) # noqa: S311
random_time = datetime.now(UTC) - timedelta(days=days_ago)
# Add some random hours/minutes variation
random_time = random_time.replace(
hour=random.randint(0, 23), # noqa: S311
minute=random.randint(0, 59), # noqa: S311
)
# Update the timestamp and is_active directly in the database
# We do this to ensure the values are persisted correctly
await session.execute(
text(
"UPDATE users SET created_at = :created_at, is_active = :is_active WHERE id = :user_id"
@@ -174,7 +246,7 @@ async def load_demo_data(session):
)
logger.info(
f"Created demo user: {user.email} (created {days_ago} days ago, active={user_data.get('is_active', True)})"
f"Created demo user: {user.email} (created {days_ago} days ago)"
)
# Add to organization if specified
@@ -182,19 +254,219 @@ async def load_demo_data(session):
role = user_data.get("role")
if org_slug and org_slug in org_map and role:
org = org_map[org_slug]
# Check if membership exists (it shouldn't for new user)
member = UserOrganization(
user_id=user.id, organization_id=org.id, role=role
)
session.add(member)
logger.info(f"Added {user.email} to {org.name} as {role}")
user_map[str(user.email)] = user
else:
logger.info(f"Demo user already exists: {existing_user.email}")
user_map[str(existing_user.email)] = existing_user
logger.debug(f"Demo user already exists: {existing_user.email}")
await session.flush()
# ========================
# 3. Load Agent Types Map (for FK resolution)
# ========================
agent_types_result = await session.execute(select(AgentType))
for at in agent_types_result.scalars().all():
agent_type_map[str(at.slug)] = at
# ========================
# 4. Create Projects
# ========================
for project_data in data.get("projects", []):
project_result = await session.execute(
select(Project).where(Project.slug == project_data["slug"])
)
existing_project = project_result.scalar_one_or_none()
if not existing_project:
# Resolve owner email to user ID
owner_id = None
owner_email = project_data.get("owner_email")
if owner_email and owner_email in user_map:
owner_id = user_map[owner_email].id
project = Project(
name=project_data["name"],
slug=project_data["slug"],
description=project_data.get("description"),
owner_id=owner_id,
autonomy_level=AutonomyLevel(
project_data.get("autonomy_level", "milestone")
),
status=ProjectStatus(project_data.get("status", "active")),
complexity=ProjectComplexity(
project_data.get("complexity", "medium")
),
client_mode=ClientMode(project_data.get("client_mode", "auto")),
settings=project_data.get("settings", {}),
)
session.add(project)
await session.flush()
project_map[str(project.slug)] = project
logger.info(f"Created demo project: {project.name}")
else:
project_map[str(existing_project.slug)] = existing_project
logger.debug(f"Demo project already exists: {existing_project.name}")
# ========================
# 5. Create Sprints
# ========================
for sprint_data in data.get("sprints", []):
project_slug = sprint_data["project_slug"]
sprint_number = sprint_data["number"]
sprint_key = f"{project_slug}:{sprint_number}"
if project_slug not in project_map:
logger.warning(f"Project not found for sprint: {project_slug}")
continue
sprint_project = project_map[project_slug]
# Check if sprint exists
sprint_result = await session.execute(
select(Sprint).where(
Sprint.project_id == sprint_project.id,
Sprint.number == sprint_number,
)
)
existing_sprint = sprint_result.scalar_one_or_none()
if not existing_sprint:
sprint = Sprint(
project_id=sprint_project.id,
name=sprint_data["name"],
number=sprint_number,
goal=sprint_data.get("goal"),
start_date=date.fromisoformat(sprint_data["start_date"]),
end_date=date.fromisoformat(sprint_data["end_date"]),
status=SprintStatus(sprint_data.get("status", "planned")),
planned_points=sprint_data.get("planned_points"),
)
session.add(sprint)
await session.flush()
sprint_map[sprint_key] = sprint
logger.info(
f"Created demo sprint: {sprint.name} for {sprint_project.name}"
)
else:
sprint_map[sprint_key] = existing_sprint
logger.debug(f"Demo sprint already exists: {existing_sprint.name}")
# ========================
# 6. Create Agent Instances
# ========================
for agent_data in data.get("agent_instances", []):
project_slug = agent_data["project_slug"]
agent_type_slug = agent_data["agent_type_slug"]
agent_name = agent_data["name"]
agent_key = f"{project_slug}:{agent_name}"
if project_slug not in project_map:
logger.warning(f"Project not found for agent: {project_slug}")
continue
if agent_type_slug not in agent_type_map:
logger.warning(f"Agent type not found: {agent_type_slug}")
continue
agent_project = project_map[project_slug]
agent_type = agent_type_map[agent_type_slug]
# Check if agent instance exists (by name within project)
agent_result = await session.execute(
select(AgentInstance).where(
AgentInstance.project_id == agent_project.id,
AgentInstance.name == agent_name,
)
)
existing_agent = agent_result.scalar_one_or_none()
if not existing_agent:
agent_instance = AgentInstance(
project_id=agent_project.id,
agent_type_id=agent_type.id,
name=agent_name,
status=AgentStatus(agent_data.get("status", "idle")),
current_task=agent_data.get("current_task"),
)
session.add(agent_instance)
await session.flush()
agent_instance_map[agent_key] = agent_instance
logger.info(
f"Created demo agent: {agent_name} ({agent_type.name}) "
f"for {agent_project.name}"
)
else:
agent_instance_map[agent_key] = existing_agent
logger.debug(f"Demo agent already exists: {existing_agent.name}")
# ========================
# 7. Create Issues
# ========================
for issue_data in data.get("issues", []):
project_slug = issue_data["project_slug"]
if project_slug not in project_map:
logger.warning(f"Project not found for issue: {project_slug}")
continue
issue_project = project_map[project_slug]
# Check if issue exists (by title within project - simple heuristic)
issue_result = await session.execute(
select(Issue).where(
Issue.project_id == issue_project.id,
Issue.title == issue_data["title"],
)
)
existing_issue = issue_result.scalar_one_or_none()
if not existing_issue:
# Resolve sprint
sprint_id = None
sprint_number = issue_data.get("sprint_number")
if sprint_number:
sprint_key = f"{project_slug}:{sprint_number}"
if sprint_key in sprint_map:
sprint_id = sprint_map[sprint_key].id
# Resolve assigned agent
assigned_agent_id = None
assigned_agent_name = issue_data.get("assigned_agent_name")
if assigned_agent_name:
agent_key = f"{project_slug}:{assigned_agent_name}"
if agent_key in agent_instance_map:
assigned_agent_id = agent_instance_map[agent_key].id
issue = Issue(
project_id=issue_project.id,
sprint_id=sprint_id,
type=IssueType(issue_data.get("type", "task")),
title=issue_data["title"],
body=issue_data.get("body", ""),
status=IssueStatus(issue_data.get("status", "open")),
priority=IssuePriority(issue_data.get("priority", "medium")),
labels=issue_data.get("labels", []),
story_points=issue_data.get("story_points"),
assigned_agent_id=assigned_agent_id,
)
session.add(issue)
logger.info(f"Created demo issue: {issue.title[:50]}...")
else:
logger.debug(
f"Demo issue already exists: {existing_issue.title[:50]}..."
)
await session.commit()
logger.info("Demo data loaded successfully")
except Exception as e:
await session.rollback()
logger.error(f"Error loading demo data: {e}")
raise
@@ -210,12 +482,12 @@ async def main():
try:
user = await init_db()
if user:
print("Database initialized successfully")
print(f"Superuser: {user.email}")
print("Database initialized successfully")
print(f"Superuser: {user.email}")
else:
print("Failed to initialize database")
print("Failed to initialize database")
except Exception as e:
print(f"Error initializing database: {e}")
print(f"Error initializing database: {e}")
raise
finally:
# Close the engine