Why Build Your Own?

Midjourney charges $10-120/month with generation limits. Running your own image generation API lets you:

πŸ’‘ What you'll build

A production-ready image generation API supporting SDXL, Flux, and custom models with queue management, webhooks, and auto-scaling.

Architecture Overview

A production image generation system has these components:

  1. API Server: Receives requests, returns job IDs
  2. Queue: Redis for job management
  3. GPU Workers: Process generation requests
  4. Storage: S3/R2 for generated images
  5. CDN: Serve images globally

Setup

Spin up a GPU instance on GPUBrazil:

# Install dependencies
pip install torch diffusers transformers accelerate
pip install fastapi uvicorn redis rq
pip install boto3 pillow

# For Flux models
pip install sentencepiece protobuf

Basic Image Generation

import torch
from diffusers import StableDiffusionXLPipeline, DPMSolverMultistepScheduler

# Load SDXL
pipe = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype=torch.float16,
    variant="fp16",
    use_safetensors=True,
)
pipe.to("cuda")

# Optimize for speed
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_xformers_memory_efficient_attention()

# Generate image
image = pipe(
    prompt="A majestic lion in a cyberpunk city, neon lights, rain, 8k, detailed",
    negative_prompt="blurry, low quality, distorted",
    num_inference_steps=25,
    guidance_scale=7.5,
    width=1024,
    height=1024,
).images[0]

image.save("output.png")

Production API Server

# api.py
from fastapi import FastAPI, HTTPException, BackgroundTasks
from pydantic import BaseModel, Field
from redis import Redis
from rq import Queue
import uuid
import boto3
from datetime import datetime

app = FastAPI(title="Image Generation API")
redis_conn = Redis(host='localhost', port=6379)
queue = Queue('image_gen', connection=redis_conn)

# S3 for image storage
s3 = boto3.client('s3')
BUCKET = 'your-image-bucket'
CDN_URL = 'https://cdn.yourdomain.com'

class GenerateRequest(BaseModel):
    prompt: str = Field(..., min_length=1, max_length=2000)
    negative_prompt: str = ""
    model: str = "sdxl"
    width: int = Field(default=1024, ge=512, le=2048)
    height: int = Field(default=1024, ge=512, le=2048)
    steps: int = Field(default=25, ge=10, le=50)
    guidance_scale: float = Field(default=7.5, ge=1, le=20)
    seed: int = None
    webhook_url: str = None

class JobResponse(BaseModel):
    job_id: str
    status: str
    estimated_time: int

class JobResult(BaseModel):
    job_id: str
    status: str
    image_url: str = None
    error: str = None
    created_at: str
    completed_at: str = None

# Store job metadata
jobs_db = {}

@app.post("/generate", response_model=JobResponse)
async def generate_image(request: GenerateRequest):
    """Submit image generation job"""
    job_id = str(uuid.uuid4())
    
    # Store job info
    jobs_db[job_id] = {
        "status": "queued",
        "request": request.dict(),
        "created_at": datetime.utcnow().isoformat(),
    }
    
    # Queue the job
    queue.enqueue(
        'worker.generate_image',
        job_id,
        request.dict(),
        job_timeout=300
    )
    
    # Estimate wait time based on queue length
    queue_length = len(queue)
    estimated_time = queue_length * 15 + 10  # ~15 sec per image
    
    return JobResponse(
        job_id=job_id,
        status="queued",
        estimated_time=estimated_time
    )

@app.get("/job/{job_id}", response_model=JobResult)
async def get_job(job_id: str):
    """Get job status and result"""
    if job_id not in jobs_db:
        raise HTTPException(status_code=404, detail="Job not found")
    
    job = jobs_db[job_id]
    return JobResult(
        job_id=job_id,
        status=job["status"],
        image_url=job.get("image_url"),
        error=job.get("error"),
        created_at=job["created_at"],
        completed_at=job.get("completed_at")
    )

@app.get("/health")
async def health():
    return {
        "status": "healthy",
        "queue_length": len(queue),
        "workers": queue.count
    }

GPU Worker

# worker.py
import torch
from diffusers import (
    StableDiffusionXLPipeline,
    FluxPipeline,
    DPMSolverMultistepScheduler
)
import boto3
import requests
from PIL import Image
import io
from datetime import datetime

# Initialize S3
s3 = boto3.client('s3')
BUCKET = 'your-image-bucket'
CDN_URL = 'https://cdn.yourdomain.com'

# Load models on worker startup
models = {}

def load_models():
    global models
    
    # SDXL
    models['sdxl'] = StableDiffusionXLPipeline.from_pretrained(
        "stabilityai/stable-diffusion-xl-base-1.0",
        torch_dtype=torch.float16,
        variant="fp16",
    ).to("cuda")
    models['sdxl'].enable_xformers_memory_efficient_attention()
    
    # Flux (optional, needs more VRAM)
    # models['flux'] = FluxPipeline.from_pretrained(
    #     "black-forest-labs/FLUX.1-schnell",
    #     torch_dtype=torch.bfloat16,
    # ).to("cuda")

load_models()

def generate_image(job_id: str, params: dict):
    """Worker function to generate image"""
    from api import jobs_db  # Import shared state
    
    try:
        jobs_db[job_id]["status"] = "processing"
        
        # Select model
        model_name = params.get("model", "sdxl")
        pipe = models.get(model_name)
        if not pipe:
            raise ValueError(f"Unknown model: {model_name}")
        
        # Set seed for reproducibility
        generator = None
        if params.get("seed"):
            generator = torch.Generator("cuda").manual_seed(params["seed"])
        
        # Generate
        image = pipe(
            prompt=params["prompt"],
            negative_prompt=params.get("negative_prompt", ""),
            width=params.get("width", 1024),
            height=params.get("height", 1024),
            num_inference_steps=params.get("steps", 25),
            guidance_scale=params.get("guidance_scale", 7.5),
            generator=generator,
        ).images[0]
        
        # Upload to S3
        buffer = io.BytesIO()
        image.save(buffer, format="PNG", optimize=True)
        buffer.seek(0)
        
        s3_key = f"generated/{job_id}.png"
        s3.upload_fileobj(
            buffer, 
            BUCKET, 
            s3_key,
            ExtraArgs={'ContentType': 'image/png'}
        )
        
        image_url = f"{CDN_URL}/{s3_key}"
        
        # Update job status
        jobs_db[job_id].update({
            "status": "completed",
            "image_url": image_url,
            "completed_at": datetime.utcnow().isoformat()
        })
        
        # Send webhook if configured
        if params.get("webhook_url"):
            requests.post(params["webhook_url"], json={
                "job_id": job_id,
                "status": "completed",
                "image_url": image_url
            })
        
        return image_url
        
    except Exception as e:
        jobs_db[job_id].update({
            "status": "failed",
            "error": str(e),
            "completed_at": datetime.utcnow().isoformat()
        })
        
        if params.get("webhook_url"):
            requests.post(params["webhook_url"], json={
                "job_id": job_id,
                "status": "failed",
                "error": str(e)
            })
        
        raise

Running the System

# Terminal 1: Redis
redis-server

# Terminal 2: API Server
uvicorn api:app --host 0.0.0.0 --port 8000

# Terminal 3: Worker(s)
rq worker image_gen --with-scheduler

Client Usage

import requests
import time

API_URL = "http://your-server:8000"

# Submit generation request
response = requests.post(f"{API_URL}/generate", json={
    "prompt": "A serene Japanese garden with cherry blossoms, koi pond, traditional bridge, soft morning light",
    "negative_prompt": "ugly, blurry, low quality",
    "width": 1024,
    "height": 1024,
    "steps": 30
})

job = response.json()
print(f"Job ID: {job['job_id']}")
print(f"Estimated time: {job['estimated_time']}s")

# Poll for result
while True:
    result = requests.get(f"{API_URL}/job/{job['job_id']}").json()
    
    if result['status'] == 'completed':
        print(f"Image URL: {result['image_url']}")
        break
    elif result['status'] == 'failed':
        print(f"Error: {result['error']}")
        break
    
    time.sleep(2)

Adding More Models

Flux for Photorealism

from diffusers import FluxPipeline

# Flux Schnell (fast)
flux = FluxPipeline.from_pretrained(
    "black-forest-labs/FLUX.1-schnell",
    torch_dtype=torch.bfloat16
).to("cuda")

# Generate (Flux uses different parameters)
image = flux(
    prompt="Portrait of a woman with freckles, natural lighting, 35mm film",
    num_inference_steps=4,  # Schnell is fast!
    guidance_scale=0,  # Schnell doesn't use guidance
    width=1024,
    height=1024,
).images[0]

Custom LoRA Models

# Load base + LoRA
pipe = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype=torch.float16,
).to("cuda")

# Load LoRA weights
pipe.load_lora_weights("path/to/your-lora.safetensors")

# Or from HuggingFace
pipe.load_lora_weights("username/lora-model-name")

Performance Optimization

Torch Compile

# PyTorch 2.0+ compilation for faster inference
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
pipe.vae.decode = torch.compile(pipe.vae.decode, mode="reduce-overhead", fullgraph=True)

# First generation is slow (compilation), subsequent are 20-40% faster

Batch Processing

# Generate multiple images at once
images = pipe(
    prompt=["prompt 1", "prompt 2", "prompt 3", "prompt 4"],
    negative_prompt=["neg 1", "neg 2", "neg 3", "neg 4"],
    num_images_per_prompt=1,
).images

# More efficient than 4 separate calls

⚠️ Batch Size vs VRAM

Batching increases VRAM usage. On 24GB GPU, batch size 4 for 1024x1024 images is typically the max.

Cost Analysis

Running your own vs Midjourney:

ServiceCostImages/MonthCost/Image
Midjourney Basic$10/mo200$0.05
Midjourney Pro$60/moUnlimited*~$0.01
GPUBrazil RTX 4090$0.40/hr~240/hr$0.0017

At scale, running your own is 30x cheaper than Midjourney!

Build Your Image Generation Empire

Generate thousands of images for pennies on GPUBrazil.

Get $5 Free Credit β†’

Production Checklist

Conclusion

Building your own Midjourney alternative is surprisingly accessible. With SDXL and Flux models plus GPUBrazil's affordable GPUs, you can generate high-quality images at a fraction of the cost of commercial services.

Start with the basic setup, add features as needed, and scale horizontally by adding more GPU workers. The economics get better at scaleβ€”at 10,000+ images/month, you're saving thousands compared to API services.