AI Image Generation API: Build Your Own Midjourney

Why Build Your Own?

Midjourney charges $10-120/month with generation limits. Running your own image generation API lets you:

Unlimited generations: Pay only for GPU time
Full control: Use any model, any style, any size
Custom models: Train on your own data
No content filters: Generate what you need
White-label: Offer as your own product

💡 What you'll build

A production-ready image generation API supporting SDXL, Flux, and custom models with queue management, webhooks, and auto-scaling.

Architecture Overview

A production image generation system has these components:

API Server: Receives requests, returns job IDs
Queue: Redis for job management
GPU Workers: Process generation requests
Storage: S3/R2 for generated images
CDN: Serve images globally

Setup

Spin up a GPU instance on GPUBrazil:

RTX 4090 (24GB): $0.40/hr — Runs SDXL, most models
L40S (48GB): $0.79/hr — Multiple models simultaneously

# Install dependencies
pip install torch diffusers transformers accelerate
pip install fastapi uvicorn redis rq
pip install boto3 pillow

# For Flux models
pip install sentencepiece protobuf

Basic Image Generation

import torch
from diffusers import StableDiffusionXLPipeline, DPMSolverMultistepScheduler

# Load SDXL
pipe = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype=torch.float16,
    variant="fp16",
    use_safetensors=True,
)
pipe.to("cuda")

# Optimize for speed
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_xformers_memory_efficient_attention()

# Generate image
image = pipe(
    prompt="A majestic lion in a cyberpunk city, neon lights, rain, 8k, detailed",
    negative_prompt="blurry, low quality, distorted",
    num_inference_steps=25,
    guidance_scale=7.5,
    width=1024,
    height=1024,
).images[0]

image.save("output.png")

Production API Server

# api.py
from fastapi import FastAPI, HTTPException, BackgroundTasks
from pydantic import BaseModel, Field
from redis import Redis
from rq import Queue
import uuid
import boto3
from datetime import datetime

app = FastAPI(title="Image Generation API")
redis_conn = Redis(host='localhost', port=6379)
queue = Queue('image_gen', connection=redis_conn)

# S3 for image storage
s3 = boto3.client('s3')
BUCKET = 'your-image-bucket'
CDN_URL = 'https://cdn.yourdomain.com'

class GenerateRequest(BaseModel):
    prompt: str = Field(..., min_length=1, max_length=2000)
    negative_prompt: str = ""
    model: str = "sdxl"
    width: int = Field(default=1024, ge=512, le=2048)
    height: int = Field(default=1024, ge=512, le=2048)
    steps: int = Field(default=25, ge=10, le=50)
    guidance_scale: float = Field(default=7.5, ge=1, le=20)
    seed: int = None
    webhook_url: str = None

class JobResponse(BaseModel):
    job_id: str
    status: str
    estimated_time: int

class JobResult(BaseModel):
    job_id: str
    status: str
    image_url: str = None
    error: str = None
    created_at: str
    completed_at: str = None

# Store job metadata
jobs_db = {}

@app.post("/generate", response_model=JobResponse)
async def generate_image(request: GenerateRequest):
    """Submit image generation job"""
    job_id = str(uuid.uuid4())
    
    # Store job info
    jobs_db[job_id] = {
        "status": "queued",
        "request": request.dict(),
        "created_at": datetime.utcnow().isoformat(),
    }
    
    # Queue the job
    queue.enqueue(
        'worker.generate_image',
        job_id,
        request.dict(),
        job_timeout=300
    )
    
    # Estimate wait time based on queue length
    queue_length = len(queue)
    estimated_time = queue_length * 15 + 10  # ~15 sec per image
    
    return JobResponse(
        job_id=job_id,
        status="queued",
        estimated_time=estimated_time
    )

@app.get("/job/{job_id}", response_model=JobResult)
async def get_job(job_id: str):
    """Get job status and result"""
    if job_id not in jobs_db:
        raise HTTPException(status_code=404, detail="Job not found")
    
    job = jobs_db[job_id]
    return JobResult(
        job_id=job_id,
        status=job["status"],
        image_url=job.get("image_url"),
        error=job.get("error"),
        created_at=job["created_at"],
        completed_at=job.get("completed_at")
    )

@app.get("/health")
async def health():
    return {
        "status": "healthy",
        "queue_length": len(queue),
        "workers": queue.count
    }

GPU Worker

# worker.py
import torch
from diffusers import (
    StableDiffusionXLPipeline,
    FluxPipeline,
    DPMSolverMultistepScheduler
)
import boto3
import requests
from PIL import Image
import io
from datetime import datetime

# Initialize S3
s3 = boto3.client('s3')
BUCKET = 'your-image-bucket'
CDN_URL = 'https://cdn.yourdomain.com'

# Load models on worker startup
models = {}

def load_models():
    global models
    
    # SDXL
    models['sdxl'] = StableDiffusionXLPipeline.from_pretrained(
        "stabilityai/stable-diffusion-xl-base-1.0",
        torch_dtype=torch.float16,
        variant="fp16",
    ).to("cuda")
    models['sdxl'].enable_xformers_memory_efficient_attention()
    
    # Flux (optional, needs more VRAM)
    # models['flux'] = FluxPipeline.from_pretrained(
    #     "black-forest-labs/FLUX.1-schnell",
    #     torch_dtype=torch.bfloat16,
    # ).to("cuda")

load_models()

def generate_image(job_id: str, params: dict):
    """Worker function to generate image"""
    from api import jobs_db  # Import shared state
    
    try:
        jobs_db[job_id]["status"] = "processing"
        
        # Select model
        model_name = params.get("model", "sdxl")
        pipe = models.get(model_name)
        if not pipe:
            raise ValueError(f"Unknown model: {model_name}")
        
        # Set seed for reproducibility
        generator = None
        if params.get("seed"):
            generator = torch.Generator("cuda").manual_seed(params["seed"])
        
        # Generate
        image = pipe(
            prompt=params["prompt"],
            negative_prompt=params.get("negative_prompt", ""),
            width=params.get("width", 1024),
            height=params.get("height", 1024),
            num_inference_steps=params.get("steps", 25),
            guidance_scale=params.get("guidance_scale", 7.5),
            generator=generator,
        ).images[0]
        
        # Upload to S3
        buffer = io.BytesIO()
        image.save(buffer, format="PNG", optimize=True)
        buffer.seek(0)
        
        s3_key = f"generated/{job_id}.png"
        s3.upload_fileobj(
            buffer, 
            BUCKET, 
            s3_key,
            ExtraArgs={'ContentType': 'image/png'}
        )
        
        image_url = f"{CDN_URL}/{s3_key}"
        
        # Update job status
        jobs_db[job_id].update({
            "status": "completed",
            "image_url": image_url,
            "completed_at": datetime.utcnow().isoformat()
        })
        
        # Send webhook if configured
        if params.get("webhook_url"):
            requests.post(params["webhook_url"], json={
                "job_id": job_id,
                "status": "completed",
                "image_url": image_url
            })
        
        return image_url
        
    except Exception as e:
        jobs_db[job_id].update({
            "status": "failed",
            "error": str(e),
            "completed_at": datetime.utcnow().isoformat()
        })
        
        if params.get("webhook_url"):
            requests.post(params["webhook_url"], json={
                "job_id": job_id,
                "status": "failed",
                "error": str(e)
            })
        
        raise

Running the System

# Terminal 1: Redis
redis-server

# Terminal 2: API Server
uvicorn api:app --host 0.0.0.0 --port 8000

# Terminal 3: Worker(s)
rq worker image_gen --with-scheduler

Client Usage

import requests
import time

API_URL = "http://your-server:8000"

# Submit generation request
response = requests.post(f"{API_URL}/generate", json={
    "prompt": "A serene Japanese garden with cherry blossoms, koi pond, traditional bridge, soft morning light",
    "negative_prompt": "ugly, blurry, low quality",
    "width": 1024,
    "height": 1024,
    "steps": 30
})

job = response.json()
print(f"Job ID: {job['job_id']}")
print(f"Estimated time: {job['estimated_time']}s")

# Poll for result
while True:
    result = requests.get(f"{API_URL}/job/{job['job_id']}").json()
    
    if result['status'] == 'completed':
        print(f"Image URL: {result['image_url']}")
        break
    elif result['status'] == 'failed':
        print(f"Error: {result['error']}")
        break
    
    time.sleep(2)

Adding More Models

Flux for Photorealism

from diffusers import FluxPipeline

# Flux Schnell (fast)
flux = FluxPipeline.from_pretrained(
    "black-forest-labs/FLUX.1-schnell",
    torch_dtype=torch.bfloat16
).to("cuda")

# Generate (Flux uses different parameters)
image = flux(
    prompt="Portrait of a woman with freckles, natural lighting, 35mm film",
    num_inference_steps=4,  # Schnell is fast!
    guidance_scale=0,  # Schnell doesn't use guidance
    width=1024,
    height=1024,
).images[0]

Custom LoRA Models

# Load base + LoRA
pipe = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype=torch.float16,
).to("cuda")

# Load LoRA weights
pipe.load_lora_weights("path/to/your-lora.safetensors")

# Or from HuggingFace
pipe.load_lora_weights("username/lora-model-name")

Performance Optimization

Torch Compile

# PyTorch 2.0+ compilation for faster inference
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
pipe.vae.decode = torch.compile(pipe.vae.decode, mode="reduce-overhead", fullgraph=True)

# First generation is slow (compilation), subsequent are 20-40% faster

Batch Processing

# Generate multiple images at once
images = pipe(
    prompt=["prompt 1", "prompt 2", "prompt 3", "prompt 4"],
    negative_prompt=["neg 1", "neg 2", "neg 3", "neg 4"],
    num_images_per_prompt=1,
).images

# More efficient than 4 separate calls

⚠️ Batch Size vs VRAM

Batching increases VRAM usage. On 24GB GPU, batch size 4 for 1024x1024 images is typically the max.

Cost Analysis

Running your own vs Midjourney:

Service	Cost	Images/Month	Cost/Image
Midjourney Basic	$10/mo	200	$0.05
Midjourney Pro	$60/mo	Unlimited*	~$0.01
GPUBrazil RTX 4090	$0.40/hr	~240/hr	$0.0017

At scale, running your own is 30x cheaper than Midjourney!

Build Your Image Generation Empire

Generate thousands of images for pennies on GPUBrazil.

Get $5 Free Credit →

Production Checklist

✅ Redis persistence for queue durability
✅ Multiple workers for parallel processing
✅ S3/R2 for image storage with CDN
✅ Rate limiting per API key
✅ Webhook support for async notifications
✅ NSFW filtering (if needed)
✅ Prometheus metrics for monitoring
✅ Auto-scaling based on queue depth

Conclusion

Building your own Midjourney alternative is surprisingly accessible. With SDXL and Flux models plus GPUBrazil's affordable GPUs, you can generate high-quality images at a fraction of the cost of commercial services.

Start with the basic setup, add features as needed, and scale horizontally by adding more GPU workers. The economics get better at scale—at 10,000+ images/month, you're saving thousands compared to API services.