Why Build Your Own?
Midjourney charges $10-120/month with generation limits. Running your own image generation API lets you:
- Unlimited generations: Pay only for GPU time
- Full control: Use any model, any style, any size
- Custom models: Train on your own data
- No content filters: Generate what you need
- White-label: Offer as your own product
π‘ What you'll build
A production-ready image generation API supporting SDXL, Flux, and custom models with queue management, webhooks, and auto-scaling.
Architecture Overview
A production image generation system has these components:
- API Server: Receives requests, returns job IDs
- Queue: Redis for job management
- GPU Workers: Process generation requests
- Storage: S3/R2 for generated images
- CDN: Serve images globally
Setup
Spin up a GPU instance on GPUBrazil:
- RTX 4090 (24GB): $0.40/hr β Runs SDXL, most models
- L40S (48GB): $0.79/hr β Multiple models simultaneously
# Install dependencies
pip install torch diffusers transformers accelerate
pip install fastapi uvicorn redis rq
pip install boto3 pillow
# For Flux models
pip install sentencepiece protobuf
Basic Image Generation
import torch
from diffusers import StableDiffusionXLPipeline, DPMSolverMultistepScheduler
# Load SDXL
pipe = StableDiffusionXLPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
torch_dtype=torch.float16,
variant="fp16",
use_safetensors=True,
)
pipe.to("cuda")
# Optimize for speed
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_xformers_memory_efficient_attention()
# Generate image
image = pipe(
prompt="A majestic lion in a cyberpunk city, neon lights, rain, 8k, detailed",
negative_prompt="blurry, low quality, distorted",
num_inference_steps=25,
guidance_scale=7.5,
width=1024,
height=1024,
).images[0]
image.save("output.png")
Production API Server
# api.py
from fastapi import FastAPI, HTTPException, BackgroundTasks
from pydantic import BaseModel, Field
from redis import Redis
from rq import Queue
import uuid
import boto3
from datetime import datetime
app = FastAPI(title="Image Generation API")
redis_conn = Redis(host='localhost', port=6379)
queue = Queue('image_gen', connection=redis_conn)
# S3 for image storage
s3 = boto3.client('s3')
BUCKET = 'your-image-bucket'
CDN_URL = 'https://cdn.yourdomain.com'
class GenerateRequest(BaseModel):
prompt: str = Field(..., min_length=1, max_length=2000)
negative_prompt: str = ""
model: str = "sdxl"
width: int = Field(default=1024, ge=512, le=2048)
height: int = Field(default=1024, ge=512, le=2048)
steps: int = Field(default=25, ge=10, le=50)
guidance_scale: float = Field(default=7.5, ge=1, le=20)
seed: int = None
webhook_url: str = None
class JobResponse(BaseModel):
job_id: str
status: str
estimated_time: int
class JobResult(BaseModel):
job_id: str
status: str
image_url: str = None
error: str = None
created_at: str
completed_at: str = None
# Store job metadata
jobs_db = {}
@app.post("/generate", response_model=JobResponse)
async def generate_image(request: GenerateRequest):
"""Submit image generation job"""
job_id = str(uuid.uuid4())
# Store job info
jobs_db[job_id] = {
"status": "queued",
"request": request.dict(),
"created_at": datetime.utcnow().isoformat(),
}
# Queue the job
queue.enqueue(
'worker.generate_image',
job_id,
request.dict(),
job_timeout=300
)
# Estimate wait time based on queue length
queue_length = len(queue)
estimated_time = queue_length * 15 + 10 # ~15 sec per image
return JobResponse(
job_id=job_id,
status="queued",
estimated_time=estimated_time
)
@app.get("/job/{job_id}", response_model=JobResult)
async def get_job(job_id: str):
"""Get job status and result"""
if job_id not in jobs_db:
raise HTTPException(status_code=404, detail="Job not found")
job = jobs_db[job_id]
return JobResult(
job_id=job_id,
status=job["status"],
image_url=job.get("image_url"),
error=job.get("error"),
created_at=job["created_at"],
completed_at=job.get("completed_at")
)
@app.get("/health")
async def health():
return {
"status": "healthy",
"queue_length": len(queue),
"workers": queue.count
}
GPU Worker
# worker.py
import torch
from diffusers import (
StableDiffusionXLPipeline,
FluxPipeline,
DPMSolverMultistepScheduler
)
import boto3
import requests
from PIL import Image
import io
from datetime import datetime
# Initialize S3
s3 = boto3.client('s3')
BUCKET = 'your-image-bucket'
CDN_URL = 'https://cdn.yourdomain.com'
# Load models on worker startup
models = {}
def load_models():
global models
# SDXL
models['sdxl'] = StableDiffusionXLPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
torch_dtype=torch.float16,
variant="fp16",
).to("cuda")
models['sdxl'].enable_xformers_memory_efficient_attention()
# Flux (optional, needs more VRAM)
# models['flux'] = FluxPipeline.from_pretrained(
# "black-forest-labs/FLUX.1-schnell",
# torch_dtype=torch.bfloat16,
# ).to("cuda")
load_models()
def generate_image(job_id: str, params: dict):
"""Worker function to generate image"""
from api import jobs_db # Import shared state
try:
jobs_db[job_id]["status"] = "processing"
# Select model
model_name = params.get("model", "sdxl")
pipe = models.get(model_name)
if not pipe:
raise ValueError(f"Unknown model: {model_name}")
# Set seed for reproducibility
generator = None
if params.get("seed"):
generator = torch.Generator("cuda").manual_seed(params["seed"])
# Generate
image = pipe(
prompt=params["prompt"],
negative_prompt=params.get("negative_prompt", ""),
width=params.get("width", 1024),
height=params.get("height", 1024),
num_inference_steps=params.get("steps", 25),
guidance_scale=params.get("guidance_scale", 7.5),
generator=generator,
).images[0]
# Upload to S3
buffer = io.BytesIO()
image.save(buffer, format="PNG", optimize=True)
buffer.seek(0)
s3_key = f"generated/{job_id}.png"
s3.upload_fileobj(
buffer,
BUCKET,
s3_key,
ExtraArgs={'ContentType': 'image/png'}
)
image_url = f"{CDN_URL}/{s3_key}"
# Update job status
jobs_db[job_id].update({
"status": "completed",
"image_url": image_url,
"completed_at": datetime.utcnow().isoformat()
})
# Send webhook if configured
if params.get("webhook_url"):
requests.post(params["webhook_url"], json={
"job_id": job_id,
"status": "completed",
"image_url": image_url
})
return image_url
except Exception as e:
jobs_db[job_id].update({
"status": "failed",
"error": str(e),
"completed_at": datetime.utcnow().isoformat()
})
if params.get("webhook_url"):
requests.post(params["webhook_url"], json={
"job_id": job_id,
"status": "failed",
"error": str(e)
})
raise
Running the System
# Terminal 1: Redis
redis-server
# Terminal 2: API Server
uvicorn api:app --host 0.0.0.0 --port 8000
# Terminal 3: Worker(s)
rq worker image_gen --with-scheduler
Client Usage
import requests
import time
API_URL = "http://your-server:8000"
# Submit generation request
response = requests.post(f"{API_URL}/generate", json={
"prompt": "A serene Japanese garden with cherry blossoms, koi pond, traditional bridge, soft morning light",
"negative_prompt": "ugly, blurry, low quality",
"width": 1024,
"height": 1024,
"steps": 30
})
job = response.json()
print(f"Job ID: {job['job_id']}")
print(f"Estimated time: {job['estimated_time']}s")
# Poll for result
while True:
result = requests.get(f"{API_URL}/job/{job['job_id']}").json()
if result['status'] == 'completed':
print(f"Image URL: {result['image_url']}")
break
elif result['status'] == 'failed':
print(f"Error: {result['error']}")
break
time.sleep(2)
Adding More Models
Flux for Photorealism
from diffusers import FluxPipeline
# Flux Schnell (fast)
flux = FluxPipeline.from_pretrained(
"black-forest-labs/FLUX.1-schnell",
torch_dtype=torch.bfloat16
).to("cuda")
# Generate (Flux uses different parameters)
image = flux(
prompt="Portrait of a woman with freckles, natural lighting, 35mm film",
num_inference_steps=4, # Schnell is fast!
guidance_scale=0, # Schnell doesn't use guidance
width=1024,
height=1024,
).images[0]
Custom LoRA Models
# Load base + LoRA
pipe = StableDiffusionXLPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
torch_dtype=torch.float16,
).to("cuda")
# Load LoRA weights
pipe.load_lora_weights("path/to/your-lora.safetensors")
# Or from HuggingFace
pipe.load_lora_weights("username/lora-model-name")
Performance Optimization
Torch Compile
# PyTorch 2.0+ compilation for faster inference
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
pipe.vae.decode = torch.compile(pipe.vae.decode, mode="reduce-overhead", fullgraph=True)
# First generation is slow (compilation), subsequent are 20-40% faster
Batch Processing
# Generate multiple images at once
images = pipe(
prompt=["prompt 1", "prompt 2", "prompt 3", "prompt 4"],
negative_prompt=["neg 1", "neg 2", "neg 3", "neg 4"],
num_images_per_prompt=1,
).images
# More efficient than 4 separate calls
β οΈ Batch Size vs VRAM
Batching increases VRAM usage. On 24GB GPU, batch size 4 for 1024x1024 images is typically the max.
Cost Analysis
Running your own vs Midjourney:
| Service | Cost | Images/Month | Cost/Image |
|---|---|---|---|
| Midjourney Basic | $10/mo | 200 | $0.05 |
| Midjourney Pro | $60/mo | Unlimited* | ~$0.01 |
| GPUBrazil RTX 4090 | $0.40/hr | ~240/hr | $0.0017 |
At scale, running your own is 30x cheaper than Midjourney!
Build Your Image Generation Empire
Generate thousands of images for pennies on GPUBrazil.
Get $5 Free Credit βProduction Checklist
- β Redis persistence for queue durability
- β Multiple workers for parallel processing
- β S3/R2 for image storage with CDN
- β Rate limiting per API key
- β Webhook support for async notifications
- β NSFW filtering (if needed)
- β Prometheus metrics for monitoring
- β Auto-scaling based on queue depth
Conclusion
Building your own Midjourney alternative is surprisingly accessible. With SDXL and Flux models plus GPUBrazil's affordable GPUs, you can generate high-quality images at a fraction of the cost of commercial services.
Start with the basic setup, add features as needed, and scale horizontally by adding more GPU workers. The economics get better at scaleβat 10,000+ images/month, you're saving thousands compared to API services.