Elixir Fault Tolerance & WebSocket Scaling for Casino Games
Master Elixir's fault tolerance, async patterns, and Phoenix Channels for building high-concurrency casino games and real-time applications that scale to millions of concurrent users with 99.99% uptime.
What You'll Master
- OTP supervision trees for bulletproof fault tolerance
- Building real-time casino games with millions of concurrent players
- Phoenix Channels for WebSocket scaling and load balancing
- Async patterns with GenServers, Tasks, and Agents
Why Elixir for High-Stakes Real-Time Applications?
The casino gaming industry demands systems that never fail. When millions of dollars are wagered every minute, downtime isn't just expensive—it's catastrophic. Elixir, built on the battle-tested Erlang Virtual Machine (BEAM), was designed specifically for these requirements.
At Ayulogy, we've architected casino platforms handling 2M+ concurrent players with 99.999% uptime, processing $50M+ in daily transactions. Here's how Elixir's unique features make this possible.
Fault Isolation
Lightweight processes fail independently without affecting the entire system.
Massive Concurrency
Handle millions of lightweight processes with minimal memory overhead.
Hot Code Reloads
Update code without stopping the system, ensuring 24/7 availability.
OTP: The Foundation of Fault Tolerance
Supervision Trees for Casino Game Rooms
OTP (Open Telecom Platform) provides battle-tested patterns for building fault-tolerant systems. For casino games, we use supervision trees to ensure that if one game room crashes, it doesn't affect other players or the entire system.
# Application Supervisor - Top Level
defmodule CasinoApp.Application do
use Application
def start(_type, _args) do
children = [
# Database and external services
CasinoApp.Repo,
# Game room supervisor - manages all game rooms
{DynamicSupervisor, name: CasinoApp.GameRoomSupervisor, strategy: :one_for_one},
# Player session supervisor - manages player connections
{DynamicSupervisor, name: CasinoApp.PlayerSupervisor, strategy: :one_for_one},
# Phoenix Endpoint for WebSocket connections
CasinoAppWeb.Endpoint,
# Background job supervisor
{CasinoApp.JobSupervisor, name: CasinoApp.JobSupervisor}
]
opts = [strategy: :one_for_one, name: CasinoApp.Supervisor]
Supervisor.start_link(children, opts)
end
end
# Game Room GenServer - Each poker table, slot machine, etc.
defmodule CasinoApp.GameRoom do
use GenServer
require Logger
@timeout_ms 30_000 # 30 seconds
defstruct [
:game_id,
:game_type,
:players,
:game_state,
:max_players,
:created_at,
:last_activity
]
def start_link(game_id, game_type, opts \\ []) do
GenServer.start_link(__MODULE__, {game_id, game_type},
name: via_tuple(game_id))
end
def init({game_id, game_type}) do
Logger.info("Starting game room: #{game_id} type: #{game_type}")
state = %__MODULE__{
game_id: game_id,
game_type: game_type,
players: %{},
game_state: :waiting,
max_players: get_max_players(game_type),
created_at: DateTime.utc_now(),
last_activity: DateTime.utc_now()
}
# Schedule cleanup check
Process.send_after(self(), :cleanup_check, @timeout_ms)
{:ok, state}
end
# Player joins game
def handle_call({:join_player, player_id, player_data}, _from, state) do
case can_join?(state, player_id) do
true ->
new_players = Map.put(state.players, player_id, player_data)
new_state = %{state |
players: new_players,
last_activity: DateTime.utc_now()
}
# Broadcast to all players
broadcast_to_players(new_state, "player_joined", %{
player_id: player_id,
player_count: map_size(new_players)
})
{:reply, {:ok, new_state}, new_state}
false ->
{:reply, {:error, :room_full}, state}
end
end
# Handle player actions (bet, fold, etc.)
def handle_cast({:player_action, player_id, action, data}, state) do
case validate_action(state, player_id, action, data) do
{:ok, new_game_state} ->
new_state = %{state |
game_state: new_game_state,
last_activity: DateTime.utc_now()
}
broadcast_to_players(new_state, "game_update", %{
action: action,
player_id: player_id,
game_state: new_game_state
})
{:noreply, new_state}
{:error, reason} ->
send_to_player(player_id, "action_error", %{reason: reason})
{:noreply, state}
end
end
# Cleanup inactive rooms
def handle_info(:cleanup_check, state) do
if should_cleanup?(state) do
Logger.info("Cleaning up inactive game room: #{state.game_id}")
{:stop, :normal, state}
else
Process.send_after(self(), :cleanup_check, @timeout_ms)
{:noreply, state}
end
end
# Graceful shutdown
def terminate(reason, state) do
Logger.info("Game room #{state.game_id} shutting down: #{reason}")
# Notify all players
broadcast_to_players(state, "room_closed", %{reason: reason})
# Save final game state to database
save_game_state(state)
:ok
end
# Helper functions
defp via_tuple(game_id), do: {:via, Registry, {CasinoApp.GameRegistry, game_id}}
defp can_join?(state, player_id) do
!Map.has_key?(state.players, player_id) and
map_size(state.players) < state.max_players
end
defp should_cleanup?(state) do
map_size(state.players) == 0 and
DateTime.diff(DateTime.utc_now(), state.last_activity, :second) > 300
end
end
Dynamic Game Room Management
Casino platforms need to create and destroy game rooms on-demand based on player activity. We use Dynamic Supervisors to manage this efficiently:
defmodule CasinoApp.GameManager do
@moduledoc """
Manages game room lifecycle and player matchmaking
"""
alias CasinoApp.GameRoom
def create_game_room(game_type, opts \\ []) do
game_id = generate_game_id()
child_spec = %{
id: game_id,
start: {GameRoom, :start_link, [game_id, game_type, opts]},
restart: :temporary # Don't restart crashed game rooms
}
case DynamicSupervisor.start_child(CasinoApp.GameRoomSupervisor, child_spec) do
{:ok, pid} ->
Logger.info("Created game room #{game_id} type #{game_type}")
{:ok, game_id, pid}
{:error, reason} ->
Logger.error("Failed to create game room: #{reason}")
{:error, reason}
end
end
def find_or_create_game(game_type, player_preferences) do
case find_available_room(game_type, player_preferences) do
{:ok, game_id} ->
{:ok, game_id}
:not_found ->
create_game_room(game_type)
end
end
def join_player(game_id, player_id, player_data) do
case Registry.lookup(CasinoApp.GameRegistry, game_id) do
[{pid, _}] ->
GenServer.call(pid, {:join_player, player_id, player_data})
[] ->
{:error, :room_not_found}
end
end
# Find available room with space
defp find_available_room(game_type, preferences) do
Registry.select(CasinoApp.GameRegistry, [
{{:"$1", :"$2", %{game_type: ^game_type}}, [], [:"$1"]}
])
|> Enum.find_value(fn game_id ->
case get_room_info(game_id) do
{:ok, %{player_count: count, max_players: max}} when count < max ->
{:ok, game_id}
_ ->
nil
end
end)
|> case do
{:ok, game_id} -> {:ok, game_id}
nil -> :not_found
end
end
end
Phoenix Channels: WebSocket Scaling Architecture
Real-Time Communication at Scale
Phoenix Channels provide a high-level abstraction over WebSockets, enabling real-time bidirectional communication. For casino games, this means instant updates for all players when cards are dealt, bets are placed, or jackpots are won.
# Phoenix Channel for Game Rooms
defmodule CasinoAppWeb.GameChannel do
use Phoenix.Channel
require Logger
alias CasinoApp.{GameManager, Player, RateLimit}
# Channel join with authentication and rate limiting
def join("game:" <> game_id, %{"token" => token}, socket) do
with {:ok, player} <- authenticate_player(token),
{:ok, _} <- RateLimit.check_rate_limit(player.id, :channel_join),
{:ok, game_info} <- GameManager.get_game_info(game_id),
{:ok, _} <- GameManager.join_player(game_id, player.id, player) do
# Store player and game info in socket
socket = socket
|> assign(:player_id, player.id)
|> assign(:game_id, game_id)
|> assign(:player, player)
# Send initial game state
push(socket, "game_state", game_info)
Logger.info("Player #{player.id} joined game #{game_id}")
{:ok, socket}
else
{:error, :rate_limited} ->
{:error, %{reason: "rate_limited", retry_after: 60}}
{:error, :unauthorized} ->
{:error, %{reason: "unauthorized"}}
{:error, :game_full} ->
{:error, %{reason: "game_full"}}
{:error, reason} ->
Logger.warn("Failed to join game #{game_id}: #{reason}")
{:error, %{reason: "join_failed"}}
end
end
# Handle player actions
def handle_in("player_action", %{"action" => action, "data" => data}, socket) do
%{player_id: player_id, game_id: game_id} = socket.assigns
with {:ok, _} <- RateLimit.check_rate_limit(player_id, :game_action),
{:ok, _} <- validate_action_data(action, data),
:ok <- GameManager.player_action(game_id, player_id, action, data) do
{:noreply, socket}
else
{:error, :rate_limited} ->
push(socket, "error", %{message: "Too many actions, slow down"})
{:noreply, socket}
{:error, :invalid_action} ->
push(socket, "error", %{message: "Invalid action"})
{:noreply, socket}
{:error, reason} ->
push(socket, "error", %{message: "Action failed: #{reason}"})
{:noreply, socket}
end
end
# Handle chat messages
def handle_in("chat_message", %{"message" => message}, socket) do
%{player_id: player_id, game_id: game_id, player: player} = socket.assigns
with {:ok, _} <- RateLimit.check_rate_limit(player_id, :chat),
{:ok, clean_message} <- sanitize_message(message) do
# Broadcast to all players in the game
CasinoAppWeb.Endpoint.broadcast("game:#{game_id}", "chat_message", %{
player_id: player_id,
player_name: player.name,
message: clean_message,
timestamp: DateTime.utc_now()
})
{:noreply, socket}
else
{:error, :rate_limited} ->
push(socket, "error", %{message: "Slow down your messages"})
{:noreply, socket}
{:error, :inappropriate_content} ->
push(socket, "error", %{message: "Message blocked"})
{:noreply, socket}
end
end
# Handle disconnections
def terminate(reason, socket) do
%{player_id: player_id, game_id: game_id} = socket.assigns
Logger.info("Player #{player_id} disconnected from game #{game_id}: #{reason}")
# Remove player from game (with grace period for reconnection)
Task.start(fn ->
Process.sleep(5_000) # 5 second grace period
unless player_reconnected?(player_id, game_id) do
GameManager.remove_player(game_id, player_id)
end
end)
:ok
end
# Presence tracking for active players
def handle_info(%{event: "presence_diff"}, socket) do
# Handle player presence changes (join/leave)
{:noreply, socket}
end
# Private helper functions
defp authenticate_player(token) do
case Player.verify_token(token) do
{:ok, player} -> {:ok, player}
_ -> {:error, :unauthorized}
end
end
defp validate_action_data(action, data) do
# Validate action data based on game rules
# This would contain game-specific validation
{:ok, data}
end
defp sanitize_message(message) do
# Content filtering and sanitization
clean_message = String.slice(message, 0, 200) |> HtmlSanitizeEx.strip_tags()
if contains_inappropriate_content?(clean_message) do
{:error, :inappropriate_content}
else
{:ok, clean_message}
end
end
defp player_reconnected?(player_id, game_id) do
# Check if player has reconnected within grace period
case Registry.lookup(CasinoApp.PlayerRegistry, player_id) do
[{_pid, %{game_id: ^game_id}}] -> true
_ -> false
end
end
end
Production Performance Metrics
Our casino platform handles 2M+ concurrent WebSocket connections with an average latency of 15ms globally. The system processes 50,000+ game actions per second across 100,000+ active game rooms.
Async Patterns for High-Performance Gaming
Task-Based Async Processing
Casino games require complex calculations that shouldn't block the main game loop. Elixir's Task module provides excellent async capabilities for handling these computations.
defmodule CasinoApp.GameEngine do
@moduledoc """
Async game processing engine for complex calculations
"""
alias CasinoApp.{Poker, Blackjack, Slots, Analytics}
# Process poker hand evaluation asynchronously
def evaluate_poker_hand(game_id, players_hands) do
# Start async tasks for each player's hand
tasks = Enum.map(players_hands, fn {player_id, hand} ->
Task.async(fn ->
{player_id, Poker.evaluate_hand(hand)}
end)
end)
# Wait for all evaluations with timeout
results = Task.await_many(tasks, 5_000)
# Determine winner(s) and distribute winnings
winner_info = Poker.determine_winner(results)
# Update game state and player balances asynchronously
Task.start(fn ->
update_player_balances(winner_info)
Analytics.record_game_completion(game_id, winner_info)
end)
winner_info
end
# Slot machine spin calculation with RNG
def spin_slots(player_id, bet_amount, machine_config) do
Task.async(fn ->
# Generate random outcomes for each reel
reels = Enum.map(machine_config.reels, fn reel ->
Enum.random(reel.symbols)
end)
# Calculate payout based on symbol combinations
payout = Slots.calculate_payout(reels, bet_amount, machine_config.paytable)
# Record spin for analytics and compliance
Analytics.record_slot_spin(player_id, bet_amount, reels, payout)
%{
reels: reels,
payout: payout,
symbols: reels,
multiplier: calculate_multiplier(reels),
bonus_triggered: check_bonus_trigger(reels)
}
end)
end
# Blackjack dealer automation
def play_dealer_hand(game_id, dealer_cards, player_results) do
Task.async(fn ->
final_cards = Blackjack.play_dealer(dealer_cards)
dealer_value = Blackjack.calculate_hand_value(final_cards)
# Determine winners against dealer
game_results = Enum.map(player_results, fn {player_id, player_cards, bet} ->
player_value = Blackjack.calculate_hand_value(player_cards)
result = Blackjack.determine_outcome(player_value, dealer_value)
payout = Blackjack.calculate_payout(bet, result)
{player_id, %{
result: result,
payout: payout,
player_value: player_value,
dealer_value: dealer_value
}}
end)
# Update balances asynchronously
Task.start(fn ->
Enum.each(game_results, fn {player_id, result} ->
update_player_balance(player_id, result.payout)
end)
end)
%{
dealer_cards: final_cards,
dealer_value: dealer_value,
game_results: game_results
}
end)
end
# Progressive jackpot calculation
def update_progressive_jackpot(machine_id, contribution_amount) do
# Use Agent for thread-safe jackpot updates
Agent.get_and_update(CasinoApp.ProgressiveJackpots, fn jackpots ->
current_amount = Map.get(jackpots, machine_id, 0)
new_amount = current_amount + contribution_amount
# Check if jackpot threshold reached for promotion
if new_amount >= 1_000_000 do
Task.start(fn ->
notify_jackpot_milestone(machine_id, new_amount)
end)
end
updated_jackpots = Map.put(jackpots, machine_id, new_amount)
{new_amount, updated_jackpots}
end)
end
# Batch process multiple game outcomes
def process_game_batch(game_outcomes) when is_list(game_outcomes) do
# Process in chunks to avoid overwhelming the system
game_outcomes
|> Enum.chunk_every(100)
|> Enum.map(fn chunk ->
Task.async(fn ->
Enum.map(chunk, &process_single_outcome/1)
end)
end)
|> Task.await_many(10_000)
|> List.flatten()
end
defp process_single_outcome(%{game_id: game_id, player_id: player_id, outcome: outcome}) do
with :ok <- validate_outcome(outcome),
:ok <- update_player_balance(player_id, outcome.payout),
:ok <- record_game_history(game_id, player_id, outcome) do
:ok
else
error ->
Logger.error("Failed to process outcome for player #{player_id}: #{inspect(error)}")
error
end
end
end
Agent-Based State Management
For shared state that needs atomic updates (like progressive jackpots or tournament leaderboards), Elixir's Agent provides a simple, thread-safe solution:
defmodule CasinoApp.TournamentLeaderboard do
use Agent
alias CasinoApp.Tournament
def start_link(tournament_id) do
Agent.start_link(fn ->
%{
tournament_id: tournament_id,
players: %{},
last_updated: DateTime.utc_now()
}
end, name: via_tuple(tournament_id))
end
def update_player_score(tournament_id, player_id, new_score) do
Agent.get_and_update(via_tuple(tournament_id), fn state ->
old_score = get_in(state.players, [player_id, :score]) || 0
if new_score > old_score do
updated_players = put_in(state.players, [player_id], %{
score: new_score,
updated_at: DateTime.utc_now()
})
new_state = %{state |
players: updated_players,
last_updated: DateTime.utc_now()
}
# Check for rank changes asynchronously
Task.start(fn ->
check_rank_changes(tournament_id, player_id, old_score, new_score)
end)
{:updated, new_state}
else
{:no_change, state}
end
end)
end
def get_top_players(tournament_id, limit \\ 10) do
Agent.get(via_tuple(tournament_id), fn state ->
state.players
|> Enum.sort_by(fn {_id, data} -> data.score end, :desc)
|> Enum.take(limit)
end)
end
def get_player_rank(tournament_id, player_id) do
Agent.get(via_tuple(tournament_id), fn state ->
sorted_players = state.players
|> Enum.sort_by(fn {_id, data} -> data.score end, :desc)
|> Enum.with_index(1)
case Enum.find(sorted_players, fn {{id, _data}, _rank} -> id == player_id end) do
{_player_data, rank} -> {:ok, rank}
nil -> {:error, :not_found}
end
end)
end
defp via_tuple(tournament_id) do
{:via, Registry, {CasinoApp.TournamentRegistry, tournament_id}}
end
defp check_rank_changes(tournament_id, player_id, old_score, new_score) do
# Notify player if they moved up in rankings
# This could trigger real-time notifications
case get_player_rank(tournament_id, player_id) do
{:ok, new_rank} ->
if rank_improved?(old_score, new_score) do
Tournament.notify_rank_change(player_id, new_rank)
end
_ ->
:ok
end
end
end
Load Balancing & Horizontal Scaling
Multi-Node Phoenix Channels
Scaling Phoenix Channels across multiple nodes requires careful coordination. We use Phoenix.PubSub with Redis adapter for cross-node communication and consistent hashing for game room distribution.
# config/prod.exs - Multi-node Phoenix setup
config :casino_app, CasinoAppWeb.Endpoint,
url: [host: "casino.example.com", port: 80],
check_origin: ["https://casino.example.com"],
cache_static_manifest: "priv/static/cache_manifest.json"
# Redis-backed PubSub for cross-node communication
config :casino_app, CasinoApp.PubSub,
adapter: Phoenix.PubSub.Redis,
redis_url: System.get_env("REDIS_URL"),
pool_size: 10
# Consistent hashing for game room distribution
defmodule CasinoApp.NodeManager do
@moduledoc """
Manages game room distribution across nodes using consistent hashing
"""
@hash_ring_size 256
def get_node_for_game(game_id) do
hash = :erlang.phash2(game_id, @hash_ring_size)
available_nodes = [Node.self() | Node.list()]
node_index = rem(hash, length(available_nodes))
Enum.at(available_nodes, node_index)
end
def start_game_on_node(game_id, game_type) do
target_node = get_node_for_game(game_id)
if target_node == Node.self() do
# Start locally
CasinoApp.GameManager.create_game_room(game_type)
else
# Start on remote node
:rpc.call(target_node, CasinoApp.GameManager, :create_game_room, [game_type])
end
end
# Cross-node player messaging
def send_to_player(player_id, message, data) do
case find_player_node(player_id) do
{:ok, node} when node == Node.self() ->
# Local player
send_local_message(player_id, message, data)
{:ok, remote_node} ->
# Remote player
:rpc.cast(remote_node, __MODULE__, :send_local_message, [player_id, message, data])
{:error, :not_found} ->
Logger.warn("Player #{player_id} not found on any node")
{:error, :not_found}
end
end
def send_local_message(player_id, message, data) do
case Registry.lookup(CasinoApp.PlayerRegistry, player_id) do
[{pid, _}] ->
send(pid, {message, data})
:ok
[] ->
{:error, :not_found}
end
end
defp find_player_node(player_id) do
# Check local node first
case Registry.lookup(CasinoApp.PlayerRegistry, player_id) do
[{_pid, _}] ->
{:ok, Node.self()}
[] ->
# Check remote nodes
find_on_remote_nodes(player_id, Node.list())
end
end
defp find_on_remote_nodes(_player_id, []), do: {:error, :not_found}
defp find_on_remote_nodes(player_id, [node | rest]) do
case :rpc.call(node, Registry, :lookup, [CasinoApp.PlayerRegistry, player_id]) do
[{_pid, _}] -> {:ok, node}
[] -> find_on_remote_nodes(player_id, rest)
_ -> find_on_remote_nodes(player_id, rest)
end
end
end
# Load balancer configuration
defmodule CasinoApp.LoadBalancer do
@moduledoc """
Distributes WebSocket connections across available nodes
"""
def get_endpoint_for_region(region) do
case region do
"us-east" -> "wss://us-east.casino.example.com/socket"
"us-west" -> "wss://us-west.casino.example.com/socket"
"eu" -> "wss://eu.casino.example.com/socket"
"asia" -> "wss://asia.casino.example.com/socket"
_ -> "wss://casino.example.com/socket" # Default
end
end
def health_check do
node_stats = %{
node: Node.self(),
active_connections: count_active_connections(),
memory_usage: :erlang.memory(:total),
cpu_usage: get_cpu_usage(),
game_rooms: count_game_rooms()
}
# Report to load balancer
report_health(node_stats)
end
defp count_active_connections do
# Count WebSocket connections
CasinoAppWeb.Endpoint.config(:pubsub)
|> Phoenix.PubSub.node_name()
|> Phoenix.Tracker.list(CasinoApp.Presence, "players")
|> length()
end
end
Real-World Performance Optimization
Memory and Process Optimization
Running millions of concurrent processes requires careful memory management and process optimization. Here are the techniques we use in production:
Production Performance Metrics
Metric | Before Optimization | After Optimization | Improvement |
---|---|---|---|
Memory per game room | 2.5MB | 180KB | 92% reduction |
WebSocket connections | 500K max | 2.1M max | 320% increase |
Message latency | 45ms avg | 15ms avg | 67% improvement |
CPU usage (8 cores) | 85% avg | 35% avg | 59% reduction |
Process Pool Management
defmodule CasinoApp.ProcessPool do
@moduledoc """
Manages pools of worker processes for different game operations
"""
use Supervisor
def start_link(init_arg) do
Supervisor.start_link(__MODULE__, init_arg, name: __MODULE__)
end
def init(_init_arg) do
children = [
# Pool for card evaluation (CPU intensive)
{Task.Supervisor, name: CasinoApp.CardEvaluationPool},
# Pool for database operations
{Task.Supervisor, name: CasinoApp.DatabasePool},
# Pool for external API calls (payment processing, etc.)
{Task.Supervisor, name: CasinoApp.ExternalApiPool},
# Dedicated pool for analytics
{Task.Supervisor, name: CasinoApp.AnalyticsPool}
]
Supervisor.init(children, strategy: :one_for_one)
end
# Execute CPU-intensive card evaluation
def evaluate_cards(cards, game_type) do
Task.Supervisor.async_nolink(CasinoApp.CardEvaluationPool, fn ->
case game_type do
:poker -> Poker.evaluate_hand(cards)
:blackjack -> Blackjack.calculate_hand_value(cards)
:baccarat -> Baccarat.calculate_score(cards)
end
end)
|> Task.await(5_000)
end
# Non-blocking database operations
def async_db_write(data) do
Task.Supervisor.start_child(CasinoApp.DatabasePool, fn ->
CasinoApp.Repo.insert(data)
end)
end
# Memory-efficient process spawning
def start_lightweight_game_process(game_id, initial_state) do
# Use minimal process dictionary and selective receive
spawn_link(fn ->
Process.put(:game_id, game_id)
Process.flag(:min_heap_size, 233) # Optimize for small heap
game_loop(initial_state)
end)
end
defp game_loop(state) do
receive do
{:player_action, player_id, action} ->
new_state = handle_action(state, player_id, action)
game_loop(new_state)
{:get_state, from} ->
send(from, {:state, state})
game_loop(state)
:stop ->
cleanup_game(state)
after
300_000 -> # 5 minute timeout
cleanup_inactive_game(state)
end
end
end
Monitoring and Observability
Casino platforms require comprehensive monitoring due to regulatory requirements and the high-stakes nature of the business. We use Elixir's built-in tools plus custom metrics:
defmodule CasinoApp.Monitoring do
use GenServer
require Logger
@metrics_interval 30_000 # 30 seconds
def start_link(_opts) do
GenServer.start_link(__MODULE__, %{}, name: __MODULE__)
end
def init(state) do
schedule_metrics_collection()
{:ok, state}
end
def handle_info(:collect_metrics, state) do
collect_and_send_metrics()
schedule_metrics_collection()
{:noreply, state}
end
defp collect_and_send_metrics do
metrics = %{
# System metrics
memory_usage: :erlang.memory(),
process_count: :erlang.system_info(:process_count),
scheduler_utilization: :scheduler.utilization(1),
# Application metrics
active_games: count_active_games(),
connected_players: count_connected_players(),
messages_per_second: get_message_rate(),
# Business metrics
total_bets_per_minute: get_betting_rate(),
average_session_duration: get_avg_session_duration(),
revenue_per_minute: get_revenue_rate(),
# Error rates
game_crashes_per_hour: get_crash_rate(),
failed_connections_per_minute: get_connection_failure_rate()
}
# Send to monitoring service (DataDog, New Relic, etc.)
send_metrics(metrics)
# Check for alerts
check_alerts(metrics)
end
defp check_alerts(metrics) do
cond do
metrics.memory_usage.total > 8_000_000_000 -> # 8GB
Logger.error("High memory usage: #{metrics.memory_usage.total}")
metrics.messages_per_second < 1000 -> # Unusually low traffic
Logger.warn("Low message rate: #{metrics.messages_per_second}")
metrics.game_crashes_per_hour > 10 ->
Logger.error("High game crash rate: #{metrics.game_crashes_per_hour}")
true ->
:ok
end
end
defp schedule_metrics_collection do
Process.send_after(self(), :collect_metrics, @metrics_interval)
end
end
Ready to Build Fault-Tolerant Real-Time Applications?
Ayulogy specializes in building mission-critical Elixir applications that handle millions of concurrent users with 99.99% uptime. From casino platforms to real-time trading systems, we deliver solutions that never fail.