Social networks involve complex many-to-many relationships. Let's explore advanced patterns for analyzing connections and influence.
CREATE TABLE users
(
user_id BIGINT PRIMARY KEY,
username VARCHAR(50) NOT NULL,
email VARCHAR(255) NOT NULL,
first_name VARCHAR(100) NOT NULL,
last_name VARCHAR(100) NOT NULL,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
is_active BOOLEAN NOT NULL DEFAULT TRUE,
UNIQUE KEY uk_users_username (username),
UNIQUE KEY uk_users_email (email),
INDEX idx_users_active (is_active)
) COMMENT = 'User profiles in the social network';
CREATE TABLE connections
(
connection_id BIGINT PRIMARY KEY,
follower_user_id BIGINT NOT NULL,
following_user_id BIGINT NOT NULL,
connection_type ENUM('follow', 'friend', 'block') NOT NULL DEFAULT 'follow',
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
is_active BOOLEAN NOT NULL DEFAULT TRUE,
-- Critical indexes for recursive traversal
INDEX idx_connections_follower_active (follower_user_id, is_active),
INDEX idx_connections_following_active (following_user_id, is_active),
INDEX idx_connections_type_active (connection_type, is_active),
UNIQUE KEY uk_connections_pair (follower_user_id, following_user_id),
FOREIGN KEY (follower_user_id) REFERENCES users(user_id),
FOREIGN KEY (following_user_id) REFERENCES users(user_id)
) COMMENT = 'Connections between users (follows, friendships, etc.)';
This query finds all users within N degrees of separation and calculates influence metrics:
WITH RECURSIVE influence_network AS
(
-- Anchor: Start with target user
SELECT
u.user_id
, u.username
, u.first_name
, u.last_name
, 0 AS degrees_of_separation
, 1.0 AS influence_weight
, ARRAY[u.user_id] AS path_users
, CAST(u.username AS VARCHAR(1000)) AS influence_path
FROM users u
WHERE
u.user_id = 12345 -- Starting user
AND u.is_active = TRUE
UNION ALL
-- Recursive step: Follow connections outward
SELECT
u.user_id
, u.username
, u.first_name
, u.last_name
, inet.degrees_of_separation + 1
, inet.influence_weight * 0.7 AS influence_weight -- Decay influence by distance
, inet.path_users || u.user_id AS path_users
, CONCAT(inet.influence_path, ' -> ', u.username) AS influence_path
FROM influence_network inet
INNER JOIN connections c ON inet.user_id = c.follower_user_id
INNER JOIN users u ON c.following_user_id = u.user_id
WHERE
inet.degrees_of_separation < 3 -- Limit to 3 degrees of separation
AND c.connection_type = 'follow'
AND c.is_active = TRUE
AND u.is_active = TRUE
AND NOT (u.user_id = ANY(inet.path_users)) -- Prevent cycles
)
SELECT
user_id
, username
, first_name
, last_name
, degrees_of_separation
, ROUND(influence_weight, 3) AS influence_score
, influence_path
FROM influence_network
WHERE degrees_of_separation > 0 -- Exclude the starting user
ORDER BY degrees_of_separation, influence_weight DESC
LIMIT 100;
This finds shared connections between two users at various depths:
WITH RECURSIVE user_network AS
(
-- Build network from first user
SELECT
c.following_user_id AS connected_user_id
, 1 AS depth_from_user1
, 'user1' AS source_user
FROM connections c
INNER JOIN users u ON c.following_user_id = u.user_id
WHERE
c.follower_user_id = 12345 -- User 1
AND c.connection_type = 'follow'
AND c.is_active = TRUE
AND u.is_active = TRUE
UNION ALL
-- Extend network from first user (recursive)
SELECT
c.following_user_id
, un.depth_from_user1 + 1
, 'user1'
FROM user_network un
INNER JOIN connections c ON un.connected_user_id = c.follower_user_id
INNER JOIN users u ON c.following_user_id = u.user_id
WHERE
un.depth_from_user1 < 2
AND un.source_user = 'user1'
AND c.connection_type = 'follow'
AND c.is_active = TRUE
AND u.is_active = TRUE
)
, user2_network AS
(
-- Build network from second user
SELECT
c.following_user_id AS connected_user_id
, 1 AS depth_from_user2
FROM connections c
INNER JOIN users u ON c.following_user_id = u.user_id
WHERE
c.follower_user_id = 67890 -- User 2
AND c.connection_type = 'follow'
AND c.is_active = TRUE
AND u.is_active = TRUE
UNION ALL
-- Extend network from second user (recursive)
SELECT
c.following_user_id
, un2.depth_from_user2 + 1
FROM user2_network un2
INNER JOIN connections c ON un2.connected_user_id = c.follower_user_id
INNER JOIN users u ON c.following_user_id = u.user_id
WHERE
un2.depth_from_user2 < 2
AND c.connection_type = 'follow'
AND c.is_active = TRUE
AND u.is_active = TRUE
)
SELECT
u.user_id
, u.username
, u.first_name
, u.last_name
, un1.depth_from_user1
, un2.depth_from_user2
, (un1.depth_from_user1 + un2.depth_from_user2) AS total_distance
FROM user_network un1
INNER JOIN user2_network un2 ON un1.connected_user_id = un2.connected_user_id
INNER JOIN users u ON un1.connected_user_id = u.user_id
WHERE un1.source_user = 'user1'
ORDER BY total_distance, u.username
LIMIT 50;
Manufacturing systems require complex recursive calculations for cost analysis and inventory planning.
CREATE TABLE products
(
product_id BIGINT PRIMARY KEY,
product_code VARCHAR(50) NOT NULL,
name VARCHAR(255) NOT NULL,
description TEXT,
unit_cost DECIMAL(12,4) NOT NULL DEFAULT 0.0000,
labor_cost DECIMAL(12,4) NOT NULL DEFAULT 0.0000,
is_manufactured BOOLEAN NOT NULL DEFAULT FALSE,
is_active BOOLEAN NOT NULL DEFAULT TRUE,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
UNIQUE KEY uk_products_code (product_code),
INDEX idx_products_manufactured_active (is_manufactured, is_active)
) COMMENT = 'Master product catalog including raw materials and assemblies';
CREATE TABLE bill_of_materials
(
bom_id BIGINT PRIMARY KEY,
parent_product_id BIGINT NOT NULL,
component_product_id BIGINT NOT NULL,
quantity_required DECIMAL(10,4) NOT NULL,
waste_factor DECIMAL(5,4) NOT NULL DEFAULT 1.0000,
is_active BOOLEAN NOT NULL DEFAULT TRUE,
effective_date DATE NOT NULL,
end_date DATE NULL,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
-- Essential indexes for recursive BOM traversal
INDEX idx_bom_parent_active (parent_product_id, is_active),
INDEX idx_bom_component_active (component_product_id, is_active),
INDEX idx_bom_dates (effective_date, end_date),
FOREIGN KEY (parent_product_id) REFERENCES products(product_id),
FOREIGN KEY (component_product_id) REFERENCES products(product_id)
) COMMENT = 'Bill of materials defining product composition and quantities';
This query calculates total cost including all sub-components and manufacturing overhead:
WITH RECURSIVE bom_explosion AS
(
-- Anchor: Start with target product
SELECT
p.product_id
, p.product_code
, p.name
, p.unit_cost
, p.labor_cost
, p.is_manufactured
, 1.0 AS quantity_needed
, 0 AS level_depth
, p.unit_cost AS raw_material_cost
, p.labor_cost AS direct_labor_cost
, CAST(p.product_code AS VARCHAR(1000)) AS component_path
FROM products p
WHERE
p.product_id = 1001 -- Target finished product
AND p.is_active = TRUE
UNION ALL
-- Recursive step: Explode BOM structure
SELECT
comp.product_id
, comp.product_code
, comp.name
, comp.unit_cost
, comp.labor_cost
, comp.is_manufactured
, bom_exp.quantity_needed * bom.quantity_required * bom.waste_factor AS quantity_needed
, bom_exp.level_depth + 1
, comp.unit_cost AS raw_material_cost
, comp.labor_cost AS direct_labor_cost
, CONCAT(bom_exp.component_path, ' > ', comp.product_code) AS component_path
FROM bom_explosion bom_exp
INNER JOIN bill_of_materials bom ON bom_exp.product_id = bom.parent_product_id
INNER JOIN products comp ON bom.component_product_id = comp.product_id
WHERE
bom_exp.level_depth < 10 -- Prevent infinite recursion
AND bom.is_active = TRUE
AND comp.is_active = TRUE
AND CURRENT_DATE BETWEEN bom.effective_date AND COALESCE(bom.end_date, '2999-12-31')
)
, cost_summary AS
(
SELECT
product_id
, product_code
, name
, level_depth
, quantity_needed
, is_manufactured
, CASE
WHEN is_manufactured = FALSE THEN quantity_needed * raw_material_cost
ELSE 0
END AS total_material_cost
, CASE
WHEN is_manufactured = TRUE THEN quantity_needed * direct_labor_cost
ELSE 0
END AS total_labor_cost
, component_path
FROM bom_explosion
)
SELECT
-- Rollup costs by level for analysis
level_depth
, COUNT(*) AS component_count
, ROUND(SUM(total_material_cost), 2) AS level_material_cost
, ROUND(SUM(total_labor_cost), 2) AS level_labor_cost
, ROUND(SUM(total_material_cost + total_labor_cost), 2) AS level_total_cost
FROM cost_summary
GROUP BY level_depth
UNION ALL
-- Overall totals
SELECT
999 AS level_depth -- Sort to bottom
, COUNT(*) AS component_count
, ROUND(SUM(total_material_cost), 2) AS total_material_cost
, ROUND(SUM(total_labor_cost), 2) AS total_labor_cost
, ROUND(SUM(total_material_cost + total_labor_cost) * 1.15, 2) AS final_cost_with_overhead -- 15% overhead
FROM cost_summary
ORDER BY level_depth;
This identifies the most expensive component paths in the BOM:
WITH RECURSIVE bom_paths AS
(
-- Anchor: Root products
SELECT
p.product_id
, p.product_code
, p.name
, p.unit_cost + p.labor_cost AS total_cost
, 1.0 AS quantity
, 0 AS depth
, p.unit_cost + p.labor_cost AS path_cost
, CAST(p.product_code AS VARCHAR(2000)) AS cost_path
, CAST(p.name AS VARCHAR(2000)) AS readable_path
FROM products p
WHERE
p.product_id = 1001
AND p.is_active = TRUE
UNION ALL
-- Recursive step: Build paths with cumulative costs
SELECT
comp.product_id
, comp.product_code
, comp.name
, comp.unit_cost + comp.labor_cost AS total_cost
, bp.quantity * bom.quantity_required * bom.waste_factor AS quantity
, bp.depth + 1
, bp.path_cost + ((comp.unit_cost + comp.labor_cost) * bom.quantity_required * bom.waste_factor) AS path_cost
, CONCAT(bp.cost_path, ' -> ', comp.product_code) AS cost_path
, CONCAT(bp.readable_path, ' -> ', comp.name) AS readable_path
FROM bom_paths bp
INNER JOIN bill_of_materials bom ON bp.product_id = bom.parent_product_id
INNER JOIN products comp ON bom.component_product_id = comp.product_id
WHERE
bp.depth < 8
AND bom.is_active = TRUE
AND comp.is_active = TRUE
AND CURRENT_DATE BETWEEN bom.effective_date AND COALESCE(bom.end_date, '2999-12-31')
)
SELECT
product_code
, name
, depth
, ROUND(quantity, 4) AS total_quantity_needed
, ROUND(total_cost, 4) AS unit_cost
, ROUND(path_cost, 2) AS cumulative_path_cost
, readable_path
FROM bom_paths
WHERE depth > 0 -- Exclude root
ORDER BY path_cost DESC, depth
LIMIT 20;
Geographic data often involves complex hierarchical relationships with spatial considerations.
CREATE TABLE geographic_regions
(
region_id BIGINT PRIMARY KEY,
region_code VARCHAR(20) NOT NULL,
name VARCHAR(255) NOT NULL,
region_type ENUM('country', 'state', 'county', 'city', 'district') NOT NULL,
parent_region_id BIGINT,
population BIGINT DEFAULT 0,
area_sq_km DECIMAL(12,2) DEFAULT 0,
latitude DECIMAL(10,8),
longitude DECIMAL(11,8),
is_active BOOLEAN NOT NULL DEFAULT TRUE,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
INDEX idx_regions_parent_type (parent_region_id, region_type),
INDEX idx_regions_type_active (region_type, is_active),
INDEX idx_regions_coordinates (latitude, longitude),
UNIQUE KEY uk_regions_code_type (region_code, region_type),
FOREIGN KEY (parent_region_id) REFERENCES geographic_regions(region_id)
) COMMENT = 'Hierarchical geographic regions with spatial data';
This query provides comprehensive regional analysis with population aggregation:
WITH RECURSIVE regional_hierarchy AS
(
-- Anchor: Start with a specific region (e.g., California)
SELECT
gr.region_id
, gr.region_code
, gr.name
, gr.region_type
, gr.parent_region_id
, gr.population
, gr.area_sq_km
, gr.latitude
, gr.longitude
, 0 AS hierarchy_level
, CAST(gr.name AS VARCHAR(1000)) AS full_path
, gr.population AS total_population
, gr.area_sq_km AS total_area
FROM geographic_regions gr
WHERE
gr.region_id = 500 -- California
AND gr.is_active = TRUE
UNION ALL
-- Recursive step: Include all sub-regions
SELECT
child.region_id
, child.region_code
, child.name
, child.region_type
, child.parent_region_id
, child.population
, child.area_sq_km
, child.latitude
, child.longitude
, rh.hierarchy_level + 1
, CONCAT(rh.full_path, ' > ', child.name) AS full_path
, child.population AS total_population
, child.area_sq_km AS total_area
FROM regional_hierarchy rh
INNER JOIN geographic_regions child ON rh.region_id = child.parent_region_id
WHERE
rh.hierarchy_level < 5
AND child.is_active = TRUE
)
, population_rollup AS
(
SELECT
region_id
, region_code
, name
, region_type
, hierarchy_level
, full_path
, population AS direct_population
, area_sq_km AS direct_area
, latitude
, longitude
, SUM(total_population) OVER (
PARTITION BY region_id
ORDER BY hierarchy_level
ROWS UNBOUNDED PRECEDING
) AS cumulative_population
FROM regional_hierarchy
)
SELECT
pr.region_type
, pr.hierarchy_level
, pr.name
, pr.region_code
, ROUND(pr.direct_population / 1000.0, 1) AS population_thousands
, ROUND(pr.direct_area, 1) AS area_sq_km
, CASE
WHEN pr.direct_area > 0 THEN ROUND(pr.direct_population / pr.direct_area, 2)
ELSE 0
END AS population_density
, pr.full_path
, CASE
WHEN pr.latitude IS NOT NULL AND pr.longitude IS NOT NULL THEN
CONCAT(ROUND(pr.latitude, 4), ', ', ROUND(pr.longitude, 4))
ELSE 'No coordinates'
END AS coordinates
FROM population_rollup pr
ORDER BY pr.hierarchy_level, pr.direct_population DESC;
This combines recursive hierarchy traversal with spatial distance calculations:
WITH RECURSIVE region_tree AS
(
-- Anchor: Start with target region
SELECT
gr.region_id
, gr.region_code
, gr.name
, gr.region_type
, gr.parent_region_id
, gr.latitude
, gr.longitude
, 0 AS relationship_distance
, 'self' AS relationship_type
FROM geographic_regions gr
WHERE
gr.region_id = 750 -- Target region (e.g., Los Angeles County)
AND gr.is_active = TRUE
UNION ALL
-- Recursive step: Get sibling regions (same parent)
SELECT
sibling.region_id
, sibling.region_code
, sibling.name
, sibling.region_type
, sibling.parent_region_id
, sibling.latitude
, sibling.longitude
, rt.relationship_distance + 1
, 'sibling'
FROM region_tree rt
INNER JOIN geographic_regions sibling ON rt.parent_region_id = sibling.parent_region_id
WHERE
rt.relationship_distance = 0
AND sibling.region_id != rt.region_id
AND sibling.is_active = TRUE
UNION ALL
-- Recursive step: Get parent and aunt/uncle regions
SELECT
parent.region_id
, parent.region_code
, parent.name
, parent.region_type
, parent.parent_region_id
, parent.latitude
, parent.longitude
, rt.relationship_distance + 1
, CASE
WHEN rt.relationship_distance = 0 THEN 'parent'
ELSE 'ancestor'
END
FROM region_tree rt
INNER JOIN geographic_regions parent ON rt.parent_region_id = parent.region_id
WHERE
rt.relationship_distance < 2
AND parent.is_active = TRUE
)
, distance_calculations AS
(
SELECT
rt.region_id
, rt.region_code
, rt.name
, rt.region_type
, rt.relationship_distance
, rt.relationship_type
, rt.latitude
, rt.longitude
, target.latitude AS target_lat
, target.longitude AS target_lng
, CASE
WHEN rt.latitude IS NOT NULL
AND rt.longitude IS NOT NULL
AND target.latitude IS NOT NULL
AND target.longitude IS NOT NULL
THEN
-- Haversine formula for distance in kilometers
6371 * ACOS(
COS(RADIANS(target.latitude)) * COS(RADIANS(rt.latitude)) *
COS(RADIANS(rt.longitude) - RADIANS(target.longitude)) +
SIN(RADIANS(target.latitude)) * SIN(RADIANS(rt.latitude))
)
ELSE NULL
END AS distance_km
FROM region_tree rt
CROSS JOIN (
SELECT latitude, longitude
FROM region_tree
WHERE relationship_distance = 0
) target
)
SELECT
region_code
, name
, region_type
, relationship_type
, relationship_distance
, CASE
WHEN distance_km IS NOT NULL THEN ROUND(distance_km, 1)
ELSE NULL
END AS distance_km
, CASE
WHEN latitude IS NOT NULL AND longitude IS NOT NULL THEN
CONCAT(ROUND(latitude, 4), ', ', ROUND(longitude, 4))
ELSE 'No coordinates'
END AS coordinates
FROM distance_calculations
WHERE relationship_distance > 0 -- Exclude self
ORDER BY
CASE relationship_type
WHEN 'parent' THEN 1
WHEN 'sibling' THEN 2
WHEN 'ancestor' THEN 3
ELSE 4
END,
COALESCE(distance_km, 999999),
name;
Complex time-based reporting often requires recursive queries to handle fiscal periods, organizational changes over time, and historical analysis.
CREATE TABLE time_periods
(
period_id BIGINT PRIMARY KEY,
period_code VARCHAR(20) NOT NULL,
period_name VARCHAR(100) NOT NULL,
period_type ENUM('year', 'quarter', 'month', 'week', 'day') NOT NULL,
parent_period_id BIGINT,
start_date DATE NOT NULL,
end_date DATE NOT NULL,
fiscal_year INT NOT NULL,
is_active BOOLEAN NOT NULL DEFAULT TRUE,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
-- Critical indexes for temporal recursion
INDEX idx_periods_parent_type (parent_period_id, period_type),
INDEX idx_periods_dates (start_date, end_date),
INDEX idx_periods_fiscal_type (fiscal_year, period_type),
UNIQUE KEY uk_periods_code (period_code),
FOREIGN KEY (parent_period_id) REFERENCES time_periods(period_id)
) COMMENT = 'Hierarchical time periods for financial reporting';
CREATE TABLE sales_data
(
sale_id BIGINT PRIMARY KEY,
period_id BIGINT NOT NULL,
product_category VARCHAR(100) NOT NULL,
revenue DECIMAL(15,2) NOT NULL DEFAULT 0.00,
units_sold INT NOT NULL DEFAULT 0,
transaction_date DATE NOT NULL,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
INDEX idx_sales_period_category (period_id, product_category),
INDEX idx_sales_date (transaction_date),
FOREIGN KEY (period_id) REFERENCES time_periods(period_id)
) COMMENT = 'Sales transaction data by time period';
This query provides comprehensive fiscal analysis with automatic period aggregation:
WITH RECURSIVE fiscal_hierarchy AS
(
-- Anchor: Start with fiscal year 2024
SELECT
tp.period_id
, tp.period_code
, tp.period_name
, tp.period_type
, tp.parent_period_id
, tp.start_date
, tp.end_date
, tp.fiscal_year
, 0 AS hierarchy_depth
, CAST(tp.period_name AS VARCHAR(500)) AS period_path
FROM time_periods tp
WHERE
tp.fiscal_year = 2024
AND tp.period_type = 'year'
AND tp.is_active = TRUE
UNION ALL
-- Recursive step: Get all sub-periods (quarters, months, etc.)
SELECT
child.period_id
, child.period_code
, child.period_name
, child.period_type
, child.parent_period_id
, child.start_date
, child.end_date
, child.fiscal_year
, fh.hierarchy_depth + 1
, CONCAT(fh.period_path, ' > ', child.period_name) AS period_path
FROM fiscal_hierarchy fh
INNER JOIN time_periods child ON fh.period_id = child.parent_period_id
WHERE
fh.hierarchy_depth < 4
AND child.is_active = TRUE
)
, sales_aggregation AS
(
SELECT
fh.period_id
, fh.period_code
, fh.period_name
, fh.period_type
, fh.hierarchy_depth
, fh.start_date
, fh.end_date
, fh.period_path
, COALESCE(SUM(sd.revenue), 0) AS total_revenue
, COALESCE(SUM(sd.units_sold), 0) AS total_units
, COUNT(DISTINCT sd.product_category) AS category_count
, COUNT(sd.sale_id) AS transaction_count
FROM fiscal_hierarchy fh
LEFT JOIN sales_data sd ON fh.period_id = sd.period_id
GROUP BY
fh.period_id, fh.period_code, fh.period_name, fh.period_type,
fh.hierarchy_depth, fh.start_date, fh.end_date, fh.period_path
)
SELECT
period_type
, period_code
, period_name
, hierarchy_depth
, ROUND(total_revenue, 2) AS revenue
, total_units
, category_count
, transaction_count
, CASE
WHEN total_units > 0 THEN ROUND(total_revenue / total_units, 2)
ELSE 0
END AS avg_revenue_per_unit
, ROUND(
100.0 * total_revenue /
LAG(total_revenue) OVER (
PARTITION BY period_type
ORDER BY start_date
), 2
) AS percent_growth
, period_path
FROM sales_aggregation
WHERE total_revenue > 0 -- Only show periods with sales
ORDER BY hierarchy_depth, start_date;
-- For social networks
CREATE INDEX idx_connections_composite ON connections
(follower_user_id, connection_type, is_active, following_user_id);
-- For BOM traversal
CREATE INDEX idx_bom_traversal ON bill_of_materials
(parent_product_id, is_active, effective_date, component_product_id);
-- Always include depth limits to prevent runaway queries
WITH RECURSIVE deep_hierarchy AS (
-- ... anchor query ...
UNION ALL
-- ... recursive query ...
WHERE depth_level < 10 -- Hard limit on recursion depth
)
-- Filter inactive records early in both anchor and recursive parts
WHERE
is_active = TRUE
AND created_at >= '2024-01-01' -- Date range filtering
AND category IN ('A', 'B', 'C') -- Category filtering
-- Use LIMIT in recursive queries processing millions of rows
SELECT ... FROM recursive_cte ORDER BY priority_column LIMIT 1000;
-- Consider breaking very large hierarchies into chunks
WHERE parent_id BETWEEN 1000 AND 2000 -- Process in chunks
Always analyze execution plans for recursive queries:
These advanced examples demonstrate how recursive queries can handle complex real-world scenarios while maintaining performance through proper indexing and query structure. The key is always to start with your data model, ensure proper indexing on recursive columns, and build complexity incrementally while monitoring performance.