diff --git a/.gitignore b/.gitignore
index c803427..16bd050 100644
--- a/.gitignore
+++ b/.gitignore
@@ -158,3 +158,9 @@ dmypy.json
# PyCharm
.idea/
+
+# pnpm
+pnpm-lock.yaml
+
+.yarn/*
+!.yarn/patches
diff --git a/backend/app/__init__.py b/backend/app/__init__.py
index 88e9dfd..1e3688b 100644
--- a/backend/app/__init__.py
+++ b/backend/app/__init__.py
@@ -6,6 +6,7 @@
from redis import asyncio as aioredis
from . import auth, messages
+from .ai_helpers.routers import router as ai_helpers_router
from .config import BACKEND_ROOT_PATH, DOMAIN, REDIS_HOST
from .experiments.routers import router as experiments_router
from .users.routers import (
@@ -62,5 +63,6 @@ def create_app() -> FastAPI:
app.include_router(users_router)
app.include_router(messages.router)
app.include_router(workspaces_router)
+ app.include_router(ai_helpers_router)
return app
diff --git a/backend/app/ai_helpers/routers.py b/backend/app/ai_helpers/routers.py
new file mode 100644
index 0000000..4fd142a
--- /dev/null
+++ b/backend/app/ai_helpers/routers.py
@@ -0,0 +1,488 @@
+import json
+import os
+from typing import Any, Optional
+
+from fastapi import APIRouter, HTTPException
+from google import genai
+from google.genai import types
+from pydantic import BaseModel
+
+router = APIRouter(prefix="/ai_helpers", tags=["AI Helpers"])
+
+
+class ExperimentAIGenerateRequest(BaseModel):
+ """Request model for experiment generation."""
+
+ goal: str
+ outcome: str
+ num_variants: int
+
+
+class ExperimentAIGenerateResponse(BaseModel):
+ """Response model for experiment generation."""
+
+ name: str
+ description: str
+ experiment_type: str
+
+
+class baysABArmsSuggestionRequest(BaseModel):
+ """Request model for Bayesian A/B arms suggestion."""
+
+ name: str
+ description: str
+ methodType: str
+ goal: str
+ outcome: str
+ numVariants: int
+ reward_type: str
+
+
+class CompleteExperimentResponse(BaseModel):
+ """Response model for complete experiment generation."""
+
+ name: str
+ description: str
+ experiment_type: str
+ arms: list[dict[str, Any]]
+ contexts: Optional[list[dict[str, Any]]] = None
+
+
+class MABArmsSuggestionRequest(BaseModel):
+ """Request model for MAB arms suggestion."""
+
+ name: str
+ description: str
+ methodType: str
+ goal: str
+ outcome: str
+ numVariants: int
+ prior_type: str
+ reward_type: str
+
+
+class CMABContextSuggestionRequest(BaseModel):
+ """Request model for CMAB context suggestion."""
+
+ name: str
+ description: str
+ methodType: str
+ goal: str
+ outcome: str
+ numVariants: int
+ prior_type: str
+ reward_type: str
+
+
+class CMABArmContext(BaseModel):
+ """Model for CMAB arm context."""
+
+ name: str
+ description: str
+ value_type: str
+
+
+class CMABArmsSuggestionRequest(BaseModel):
+ """Request model for CMAB arms suggestion."""
+
+ name: str
+ description: str
+ methodType: str
+ goal: str
+ outcome: str
+ numVariants: int
+ prior_type: str
+ reward_type: str
+ contexts: list[CMABArmContext]
+
+
+api_key = os.environ.get("GEMINI_API_KEY")
+if not api_key:
+ raise HTTPException(status_code=500, detail="Gemini API key not set")
+
+client = genai.Client(api_key=api_key)
+
+
+@router.post("/suggestBaysAB-arms")
+async def suggest_arms(
+ request: baysABArmsSuggestionRequest,
+) -> list[dict[str, Any]]:
+ """Suggest arms for Bayesian A/B test."""
+ # System instruction for arms suggestion
+ system_instruction = (
+ "You are an assistant for a tool that helps social sector "
+ "organizations run digital experiments.\n"
+ "Given the experiment details below, suggest concise names and "
+ "descriptions for each arm (variant) of the experiment.\n"
+ "For Bayesian A/B tests, there are usually two arms: a control "
+ "(existing/baseline) and a treatment (new/changed feature). "
+ "For other experiment types, use the number of variants provided.\n"
+ "For each arm, also suggest reasonable initial values for mu_init "
+ "(mean prior, between 0 and 1 for rates) and sigma_init "
+ "(standard deviation).\n"
+ "Respond ONLY with a valid JSON array. Each array element should be "
+ "an object with keys: name, description, mu_init, sigma_init. "
+ "No explanation, no markdown."
+ )
+
+ user_prompt = (
+ f"Experiment details:\n"
+ f"- Name: {request.name}\n"
+ f"- Description: {request.description}\n"
+ f"- Method type: {request.methodType}\n"
+ f"- Goal: {request.goal}\n"
+ f"- Outcome: {request.outcome}\n"
+ f"- Number of variants: {request.numVariants}\n"
+ f"- Reward type: {request.reward_type}\n"
+ )
+
+ try:
+ response = client.models.generate_content(
+ model="gemini-2.0-flash",
+ contents=[user_prompt],
+ config=types.GenerateContentConfig(
+ system_instruction=system_instruction,
+ max_output_tokens=500,
+ temperature=0.1,
+ response_mime_type="application/json",
+ ),
+ )
+ return json.loads(response.text)
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=f"Gemini API error: {e}") from e
+
+
+@router.post("/suggestMAB-arms")
+async def suggest_mab_arms(
+ request: MABArmsSuggestionRequest,
+) -> list[dict[str, Any]]:
+ """Suggest arms for Multi-Armed Bandit experiment."""
+ system_instruction = (
+ "You are an assistant for a tool that helps social sector "
+ "organizations run digital experiments.\n"
+ "Given the experiment details below, suggest concise names and "
+ "descriptions for each arm (variant) of the experiment.\n"
+ "For multi-armed bandit (MAB) experiments, use the number of "
+ "variants provided.\n"
+ "For each arm, also suggest reasonable initial values for alpha_init "
+ "and beta_init (for beta prior) or mu_init and sigma_init "
+ "(for normal prior), depending on the prior_type.\n"
+ "Respond ONLY with a valid JSON array. Each array element should be "
+ "an object with keys: name, description, and the appropriate prior "
+ "parameters. No explanation, no markdown."
+ )
+
+ user_prompt = (
+ f"Experiment details:\n"
+ f"- Name: {request.name}\n"
+ f"- Description: {request.description}\n"
+ f"- Method type: {request.methodType}\n"
+ f"- Goal: {request.goal}\n"
+ f"- Outcome: {request.outcome}\n"
+ f"- Number of variants: {request.numVariants}\n"
+ f"- Prior type: {request.prior_type}\n"
+ f"- Reward type: {request.reward_type}\n"
+ )
+
+ try:
+ response = client.models.generate_content(
+ model="gemini-2.0-flash",
+ contents=[user_prompt],
+ config=types.GenerateContentConfig(
+ system_instruction=system_instruction,
+ max_output_tokens=500,
+ temperature=0.9,
+ response_mime_type="application/json",
+ ),
+ )
+ print(response.text)
+ return json.loads(response.text)
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=f"Gemini API error: {e}") from e
+
+
+@router.post("/suggestCMAB-contexts")
+async def suggest_cmab_contexts(
+ request: CMABContextSuggestionRequest,
+) -> list[dict[str, Any]]:
+ """Suggest contexts for Contextual Multi-Armed Bandit experiment."""
+ system_instruction = (
+ "You are an assistant for a tool that helps social sector "
+ "organizations run digital experiments.\n"
+ "Given the experiment details below, suggest relevant user contexts "
+ "for a Contextual Bandit (CMAB) experiment.\n"
+ "Contexts are user attributes (e.g., age, location, engagement level) "
+ "that might influence how they respond to different variants.\n"
+ "For each context, provide a concise 'name', a 'description', and "
+ "a 'value_type' ('binary' or 'real-valued').\n"
+ "Respond ONLY with a valid JSON array. Each array element should be "
+ "an object with keys: name, description, value_type. No explanation, "
+ "no markdown."
+ )
+
+ user_prompt = (
+ f"Experiment details:\n"
+ f"- Name: {request.name}\n"
+ f"- Description: {request.description}\n"
+ f"- Method type: {request.methodType}\n"
+ f"- Goal: {request.goal}\n"
+ f"- Outcome: {request.outcome}\n"
+ f"- Number of variants: {request.numVariants}\n"
+ f"- Prior type: {request.prior_type}\n"
+ f"- Reward type: {request.reward_type}\n"
+ )
+
+ try:
+ response = client.models.generate_content(
+ model="gemini-2.0-flash",
+ contents=[user_prompt],
+ config=types.GenerateContentConfig(
+ system_instruction=system_instruction,
+ max_output_tokens=500,
+ temperature=0.7,
+ response_mime_type="application/json",
+ ),
+ )
+ print(response.text)
+ return json.loads(response.text)
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=f"Gemini API error: {e}") from e
+
+
+@router.post("/suggestCMAB-arms")
+async def suggest_cmab_arms(
+ request: CMABArmsSuggestionRequest,
+) -> list[dict[str, Any]]:
+ """Suggest arms for Contextual Multi-Armed Bandit experiment."""
+ system_instruction = (
+ "You are an assistant for a tool that helps social sector "
+ "organizations run digital experiments.\n"
+ "Given the experiment details and user contexts below, suggest "
+ "concise names and descriptions for each arm (variant) of a "
+ "Contextual Bandit (CMAB) experiment.\n"
+ "The arms should be distinct variations of a feature that is being "
+ "tested.\n"
+ "For each arm, also suggest reasonable initial values for mu_init "
+ "(mean prior) and sigma_init (standard deviation prior).\n"
+ "Respond ONLY with a valid JSON array. Each array element should be "
+ "an object with keys: name, description, mu_init, sigma_init. "
+ "No explanation, no markdown."
+ )
+
+ contexts_str = "\n".join(
+ [f"- {c.name} ({c.value_type}): {c.description}" for c in request.contexts]
+ )
+
+ user_prompt = (
+ f"Experiment details:\n"
+ f"- Name: {request.name}\n"
+ f"- Description: {request.description}\n"
+ f"- Method type: {request.methodType}\n"
+ f"- Goal: {request.goal}\n"
+ f"- Outcome: {request.outcome}\n"
+ f"- Number of variants: {request.numVariants}\n"
+ f"- Prior type: {request.prior_type}\n"
+ f"- Reward type: {request.reward_type}\n"
+ f"User Contexts:\n{contexts_str}\n"
+ )
+
+ try:
+ response = client.models.generate_content(
+ model="gemini-2.0-flash",
+ contents=[user_prompt],
+ config=types.GenerateContentConfig(
+ system_instruction=system_instruction,
+ max_output_tokens=500,
+ temperature=0.7,
+ response_mime_type="application/json",
+ ),
+ )
+ print(response.text)
+ return json.loads(response.text)
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=f"Gemini API error: {e}") from e
+
+
+async def generate_experiment_fields_logic(
+ data: ExperimentAIGenerateRequest,
+) -> ExperimentAIGenerateResponse:
+ """Generate experiment name, description, and type based on user inputs."""
+ try:
+ system_instruction = (
+ "You are an assistant for a tool that helps social sector "
+ "organizations run digital experiments.\n"
+ "Below are documentation excerpts about different experiment "
+ "types. Use this context to choose the most appropriate "
+ "experiment type and generate relevant names and descriptions.\n"
+ "-----\n"
+ "# Bayesian A/B Testing\n"
+ "Bayesian A/B testing compares two variants: treatment (e.g. a "
+ "new feature) and control (e.g. an existing feature). This is a "
+ "useful experiment when you need intuitive probability statements "
+ "about which arm is better for making downstream decisions, and "
+ "have the resources to balance how your arms are allocated to "
+ "your experimental cohort. Choose this over the bandit algorithms "
+ "when you're trying to make a 'permanent' decision about which "
+ "variant is better, as opposed to trying to dynamically pick the "
+ "best performing variant as data comes in.\n"
+ "With A/B testing, you have 2 variants of a feature / "
+ "implementation (one is ideally a baseline / existing feature "
+ "that you want to compare the other, a new feature, against). "
+ "You present users with one of the variants at a random but with "
+ "a fixed probability throughout the experiment and observe the "
+ "outcome of their interaction with it. Unlike frequentist A/B "
+ "testing, this method lets you set prior probabilities for the "
+ "treatment and control arms, similarly to the bandit experiments. "
+ "However, unlike the bandit methods, the posterior is computed at "
+ "the end of the experiment, and not with every observed outcome.\n"
+ "-----\n"
+ "# Contextual Bandits (CMABs)\n"
+ "Contextual bandits (CMABs), similarly to multi-armed bandits "
+ "(MABs), are useful for running experiments where you have "
+ "multiple variants of a feature / implementation that you want to "
+ "test. However, the key difference is that contextual bandits "
+ "take information about the end-user (e.g. gender, age, "
+ "engagement history) into account while converging to the "
+ "best-performing variant. The crucial difference is that we take "
+ "user information into account while updating these probabilities "
+ "for contextual bandits. Thus, rather than having a single "
+ "best-performing variant at the end of an experiment, you instead "
+ "have the best-performing variant that depends on the user "
+ "context.\n"
+ "-----\n"
+ "# Multi-Armed Bandits (MABs)\n"
+ "Multi-armed Bandits (MABs) are useful for running experiments "
+ "where you have multiple variants of a feature / implementation "
+ "that you want to test, and want to automatically converge to "
+ "the variant that produces the best results. MABs are a "
+ "specialized reinforcement learning algorithm: let's imagine that "
+ "you have set up N variants of an experiment, and for each "
+ "variant you have some prior probability of a desired result. "
+ "You serve each of your users one of these variants (the strategy "
+ "for choosing the variant is based on the prior probabilities), "
+ "and observe the result of their interaction with it. Once you "
+ "have observed the result, the algorithm updates your arm / "
+ "variant's probability of achieving the desired result. The next "
+ "time you serve a user one of the variants, the experiments "
+ "engine uses these updated probabilities to determine which "
+ "variant to show them. Since we update the probabilities for the "
+ "variants with every result observation, at any given time you "
+ "can observe the updated probability of success for every arm. "
+ "The best-performing variant at the end of the experiment is the "
+ "one with the highest probability.\n"
+ "-----\n"
+ "Given a goal, outcome, and number of variants, generate:\n"
+ "1. A concise and descriptive experiment name (max 8 words).\n"
+ "2. A detailed description of the experiment.\n"
+ "3. The most appropriate experiment type: 'mab' (multi-armed "
+ "bandit), 'bayes_ab' (Bayesian A/B test), or 'cmab' "
+ "(contextual bandit).\n"
+ "Respond ONLY with a valid JSON object with keys: name, "
+ "description, experiment_type. No explanation, no markdown."
+ )
+ user_prompt = (
+ f"User inputs:\n"
+ f"- Goal: {data.goal}\n"
+ f"- Outcome: {data.outcome}\n"
+ f"- Number of variants: {data.num_variants}\n"
+ )
+
+ response = client.models.generate_content(
+ model="gemini-2.0-flash",
+ contents=[user_prompt],
+ config=types.GenerateContentConfig(
+ system_instruction=system_instruction,
+ max_output_tokens=500,
+ temperature=0.1,
+ response_mime_type="application/json",
+ response_schema=ExperimentAIGenerateResponse,
+ ),
+ )
+
+ response_data = json.loads(response.text)
+ return ExperimentAIGenerateResponse(**response_data)
+
+ except Exception as e:
+ raise RuntimeError(f"Gemini API error: {e}") from e
+
+
+@router.post("/generate-whole-experiment", response_model=CompleteExperimentResponse)
+async def generate_whole_experiment(
+ data: ExperimentAIGenerateRequest,
+) -> CompleteExperimentResponse:
+ """Generate a complete experiment configuration."""
+ try:
+ experiment_fields = await generate_experiment_fields_logic(data)
+
+ arms: list[dict[str, Any]] = []
+ contexts: Optional[list[dict[str, Any]]] = None
+
+ if experiment_fields.experiment_type == "bayes_ab":
+ # Generate arms for Bayesian A/B test
+ bayes_request = baysABArmsSuggestionRequest(
+ name=experiment_fields.name,
+ description=experiment_fields.description,
+ methodType=experiment_fields.experiment_type,
+ goal=data.goal,
+ outcome=data.outcome,
+ numVariants=data.num_variants,
+ reward_type="binary", # Default for A/B tests
+ )
+ arms = await suggest_arms(bayes_request)
+
+ elif experiment_fields.experiment_type == "mab":
+ # Generate arms for Multi-Armed Bandit
+ mab_request = MABArmsSuggestionRequest(
+ name=experiment_fields.name,
+ description=experiment_fields.description,
+ methodType=experiment_fields.experiment_type,
+ goal=data.goal,
+ outcome=data.outcome,
+ numVariants=data.num_variants,
+ prior_type="beta", # Default prior type
+ reward_type="binary", # Default reward type
+ )
+ arms = await suggest_mab_arms(mab_request)
+
+ elif experiment_fields.experiment_type == "cmab":
+ # Generate contexts first for Contextual Bandit
+ context_request = CMABContextSuggestionRequest(
+ name=experiment_fields.name,
+ description=experiment_fields.description,
+ methodType=experiment_fields.experiment_type,
+ goal=data.goal,
+ outcome=data.outcome,
+ numVariants=data.num_variants,
+ prior_type="normal", # Default prior type
+ reward_type="binary", # Default reward type
+ )
+ contexts = await suggest_cmab_contexts(context_request)
+
+ # Generate arms for Contextual Bandit
+ if contexts is not None:
+ cmab_contexts = [CMABArmContext(**ctx) for ctx in contexts]
+ cmab_request = CMABArmsSuggestionRequest(
+ name=experiment_fields.name,
+ description=experiment_fields.description,
+ methodType=experiment_fields.experiment_type,
+ goal=data.goal,
+ outcome=data.outcome,
+ numVariants=data.num_variants,
+ prior_type="normal", # Default prior type
+ reward_type="binary", # Default reward type
+ contexts=cmab_contexts,
+ )
+ arms = await suggest_cmab_arms(cmab_request)
+
+ return CompleteExperimentResponse(
+ name=experiment_fields.name,
+ description=experiment_fields.description,
+ experiment_type=experiment_fields.experiment_type,
+ arms=arms,
+ contexts=contexts,
+ )
+
+ except Exception as e:
+ raise HTTPException(
+ status_code=500, detail=f"Error generating complete experiment: {e}"
+ ) from e
diff --git a/backend/requirements.txt b/backend/requirements.txt
index b762ddb..7c6fa22 100644
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -15,3 +15,5 @@ sqlalchemy[asyncio]==2.0.20
uvicorn==0.23.2
boto3==1.37.25
pydantic[email]==2.11.3
+google==3.0.0
+google-genai==1.20.0
diff --git a/frontend/src/app/(protected)/experiments/add/ai-wizard/page.tsx b/frontend/src/app/(protected)/experiments/add/ai-wizard/page.tsx
new file mode 100644
index 0000000..cbf84b2
--- /dev/null
+++ b/frontend/src/app/(protected)/experiments/add/ai-wizard/page.tsx
@@ -0,0 +1,296 @@
+"use client";
+import { useState, useEffect } from "react";
+import { Button } from "@/components/ui/button";
+import { Input } from "@/components/ui/input";
+import { Textarea } from "@/components/ui/textarea";
+import { useRouter } from "next/navigation";
+import { useExperimentStore } from "../../store/useExperimentStore";
+import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
+import { HelpCircle, Sparkles, ArrowRight, Wand2, FlaskConical } from "lucide-react";
+import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/components/ui/tooltip";
+import { Separator } from "@/components/ui/separator";
+import {
+ Breadcrumb,
+ BreadcrumbItem,
+ BreadcrumbLink,
+ BreadcrumbList,
+ BreadcrumbPage,
+ BreadcrumbSeparator,
+} from "@/components/ui/breadcrumb";
+
+export default function AIWizardPage() {
+ const router = useRouter();
+ const {
+ updateName,
+ updateDescription,
+ updateMethodType,
+ updateArms,
+ updateContexts,
+ aiWizardState,
+ updateAIGoal,
+ updateAIOutcome,
+ updateAINumVariants,
+ resetState
+ } = useExperimentStore();
+
+
+ useEffect(() => {
+ resetState();
+ }, []);
+
+ const [isGenerating, setIsGenerating] = useState(false);
+
+ const handleGenerate = async () => {
+ setIsGenerating(true);
+ try {
+ const response = await fetch('http://localhost:8000/ai_helpers/generate-whole-experiment', {
+ method: 'POST',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify({
+ goal: aiWizardState.goal,
+ outcome: aiWizardState.outcome,
+ num_variants: aiWizardState.numVariants,
+ }),
+ });
+
+ if (response.ok) {
+ const aiData = await response.json();
+
+ updateName(aiData.name);
+ updateDescription(aiData.description);
+ updateMethodType(aiData.experiment_type);
+
+ if (aiData.arms && Array.isArray(aiData.arms)) {
+ updateArms(aiData.arms);
+ }
+
+ if (aiData.contexts && Array.isArray(aiData.contexts)) {
+ updateContexts(aiData.contexts);
+ }
+
+ router.push("/experiments/add");
+ } else {
+ console.error("AI generation failed with status:", response.status);
+ }
+ } catch (error) {
+ console.error("AI generation failed:", error);
+ } finally {
+ setIsGenerating(false);
+ }
+ };
+
+ const isFormValid = aiWizardState.goal.trim() && aiWizardState.outcome.trim() && aiWizardState.numVariants >= 2;
+
+ return (
+
+
+ Answer a few questions and we'll design your experiment
+
+
+
+
+
+
+
+
+
+ Experiment Details
+
+
+ Answer these questions to help AI design your experiment
+
+
+
+
+
+
+
+
+
+
+
+
+ Examples: Increase sign-ups by testing a new onboarding flow; reduce drop-off by changing message timing; improve engagement by using a casual tone for youth users.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Examples: Completion rate (% who finish), click rate (% who click), time spent, engagement score, drop-off rate. What will you do if the outcome improves?
+
+
+
+
+ updateAIOutcome(e.target.value)}
+ placeholder="E.g., Completion rate, engagement score, time spent, drop-off rate, clicks. If completion rate improves, roll out new onboarding to all users."
+ className="text-base"
+ />
+
+ This should be something you can measure on your platform.
+
+
+
+ Required field
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Variants are the different versions you want to compare. For example: Variant 1 - current button, Variant 2 - new red button, Variant 3 - new blue button.
+
+
+
+
+ updateAINumVariants(Number(e.target.value))}
+ className="w-24 text-base text-center"
+ placeholder="E.g., 2 for A/B test, 3 for multivariate"
+ />
+
+ Usually 2-4 variants work best. More variants need more users to get reliable results.
+
+ Please review all the details below before creating your experiment.
+
+
+
+ {!isFormValid && (
+
+
+
+ Some required information is missing. Please go back to previous steps to complete the setup.
+
+
+ )}
+
+
+
+
+
+
+ Basic Information
+
+
+
+