You've mastered the fundamentals and intermediate techniques. Now we enter the realm of window function expertise - where SQL becomes a powerful analytical programming language. These advanced patterns solve problems that traditionally required specialized tools or complex application code.
Sophisticated CLV calculation using statistical modeling and behavioral analysis:
-- PostgreSQL: Advanced CLV with churn prediction
WITH customer_behavior_metrics AS (
SELECT
customer_id,
order_date,
order_amount,
-- Behavioral indicators
ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY order_date) as order_sequence,
COUNT(*) OVER (PARTITION BY customer_id) as total_lifetime_orders,
-- Time-based patterns
order_date - LAG(order_date) OVER (
PARTITION BY customer_id ORDER BY order_date
) as days_between_orders,
AVG(order_date - LAG(order_date) OVER (
PARTITION BY customer_id ORDER BY order_date
)) OVER (PARTITION BY customer_id) as avg_order_frequency_days,
-- Exponentially weighted moving average for trend detection
SUM(order_amount * POWER(0.9, ROW_NUMBER() OVER (
PARTITION BY customer_id ORDER BY order_date DESC
) - 1)) OVER (
PARTITION BY customer_id ORDER BY order_date
ROWS UNBOUNDED PRECEDING
) / SUM(POWER(0.9, ROW_NUMBER() OVER (
PARTITION BY customer_id ORDER BY order_date DESC
) - 1)) OVER (
PARTITION BY customer_id ORDER BY order_date
ROWS UNBOUNDED PRECEDING
) as exponential_moving_avg_value
FROM orders
WHERE order_date >= CURRENT_DATE - INTERVAL '3 years'
),
customer_segments AS (
SELECT
customer_id,
-- Recency (days since last order)
CURRENT_DATE - MAX(order_date) as days_since_last_order,
-- Frequency (annual order rate)
COUNT(*) * 365.0 / (MAX(order_date) - MIN(order_date) + 1) as annual_order_frequency,
-- Monetary (trend-adjusted average)
AVG(exponential_moving_avg_value) as trend_adjusted_avg_value,
-- Behavioral consistency
STDDEV(order_amount) / NULLIF(AVG(order_amount), 0) as spending_volatility,
-- Value trend (regression slope)
REGR_SLOPE(order_amount, EXTRACT(EPOCH FROM order_date)) as spending_trend,
-- Calculate churn probability using behavioral signals
CASE
WHEN MAX(order_date) < CURRENT_DATE - INTERVAL '1 year' THEN 0.9
WHEN MAX(order_date) < CURRENT_DATE - INTERVAL '6 months' THEN 0.6
WHEN MAX(order_date) < CURRENT_DATE - INTERVAL '3 months' THEN 0.3
ELSE 0.1
END * (1 - LEAST(COUNT(*) / 20.0, 1)) as churn_probability
FROM customer_behavior_metrics
GROUP BY customer_id
),
clv_calculations AS (
SELECT
customer_id,
days_since_last_order,
annual_order_frequency,
trend_adjusted_avg_value,
spending_trend,
churn_probability,
-- Multi-factor CLV prediction
(trend_adjusted_avg_value * annual_order_frequency * 0.30 * -- 30% gross margin
CASE
WHEN churn_probability < 0.2 THEN 3.5 -- 3.5 year expected lifespan
WHEN churn_probability < 0.5 THEN 2.0 -- 2 year lifespan
ELSE 1.0 -- 1 year lifespan
END) * (1 - churn_probability) as base_predicted_clv,
-- Trend-adjusted CLV (accounts for spending trajectory)
CASE
WHEN spending_trend > 0 THEN
(trend_adjusted_avg_value * annual_order_frequency * 0.30 * 2.5) *
(1 - churn_probability) * (1 + spending_trend * 200)
ELSE
(trend_adjusted_avg_value * annual_order_frequency * 0.30 * 1.8) *
(1 - churn_probability) * GREATEST(1 + spending_trend * 200, 0.4)
END as trend_adjusted_clv
FROM customer_segments
)
SELECT
customer_id,
ROUND(base_predicted_clv, 2) as predicted_clv,
ROUND(trend_adjusted_clv, 2) as trend_adjusted_clv,
ROUND(churn_probability, 3) as churn_probability,
-- Advanced segmentation
CASE
WHEN trend_adjusted_clv >= PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY trend_adjusted_clv) OVER()
AND churn_probability < 0.3 THEN 'Champions'
WHEN trend_adjusted_clv >= PERCENTILE_CONT(0.80) WITHIN GROUP (ORDER BY trend_adjusted_clv) OVER()
AND days_since_last_order < 90 THEN 'Loyal Customers'
WHEN trend_adjusted_clv >= PERCENTILE_CONT(0.80) WITHIN GROUP (ORDER BY trend_adjusted_clv) OVER()
AND days_since_last_order >= 90 THEN 'Potential Loyalists'
WHEN churn_probability > 0.6 AND base_predicted_clv > 1000 THEN 'Cannot Lose'
WHEN churn_probability > 0.8 THEN 'At Risk'
WHEN annual_order_frequency < 2 AND days_since_last_order > 180 THEN 'Hibernating'
ELSE 'Developing'
END as customer_segment,
-- Marketing investment priority
ROUND(
(trend_adjusted_clv / 500.0) * (1 - churn_probability) *
CASE WHEN days_since_last_order < 30 THEN 1.5 ELSE 1.0 END, 2
) as marketing_priority_score
FROM clv_calculations
ORDER BY trend_adjusted_clv DESC;
Advanced anomaly detection using multiple statistical methods:
-- Oracle: Multi-layered anomaly detection system
WITH time_series_base AS (
SELECT
metric_date,
metric_value,
-- Multi-window moving statistics
AVG(metric_value) OVER (
ORDER BY metric_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) as ma_7_day,
STDDEV(metric_value) OVER (
ORDER BY metric_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) as stddev_7_day,
AVG(metric_value) OVER (
ORDER BY metric_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
) as ma_30_day,
STDDEV(metric_value) OVER (
ORDER BY metric_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
) as stddev_30_day,
-- Seasonal baseline (same day of week historically)
AVG(metric_value) OVER (
PARTITION BY TO_CHAR(metric_date, 'D') -- Day of week
ORDER BY metric_date
ROWS BETWEEN 60 PRECEDING AND CURRENT ROW
) as seasonal_dow_baseline,
STDDEV(metric_value) OVER (
PARTITION BY TO_CHAR(metric_date, 'D')
ORDER BY metric_date
ROWS BETWEEN 60 PRECEDING AND CURRENT ROW
) as seasonal_dow_stddev
FROM daily_metrics
WHERE metric_date >= SYSDATE - 180 -- 6 months
),
statistical_analysis AS (
SELECT
tsb.*,
-- Multiple Z-score calculations
CASE
WHEN stddev_7_day > 0 THEN (metric_value - ma_7_day) / stddev_7_day
ELSE 0
END as z_score_7_day,
CASE
WHEN stddev_30_day > 0 THEN (metric_value - ma_30_day) / stddev_30_day
ELSE 0
END as z_score_30_day,
CASE
WHEN seasonal_dow_stddev > 0 THEN (metric_value - seasonal_dow_baseline) / seasonal_dow_stddev
ELSE 0
END as seasonal_z_score,
-- Percentage deviations
(metric_value - ma_7_day) / NULLIF(ma_7_day, 0) * 100 as pct_dev_7_day,
(metric_value - seasonal_dow_baseline) / NULLIF(seasonal_dow_baseline, 0) * 100 as seasonal_pct_dev,
-- Trend analysis
REGR_SLOPE(metric_value, TO_NUMBER(metric_date - DATE '1970-01-01')) OVER (
ORDER BY metric_date
ROWS BETWEEN 13 PRECEDING AND CURRENT ROW -- 2-week trend
) as trend_slope_14_day,
-- Volatility and momentum
STDDEV(metric_value - LAG(metric_value, 1) OVER (ORDER BY metric_date)) OVER (
ORDER BY metric_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) as volatility_7_day,
(metric_value - LAG(metric_value, 1) OVER (ORDER BY metric_date)) /
NULLIF(LAG(metric_value, 1) OVER (ORDER BY metric_date), 0) * 100 as day_over_day_pct
FROM time_series_base tsb
),
anomaly_scoring AS (
SELECT
sa.*,
-- Composite anomaly score (0-5 scale)
(CASE WHEN ABS(z_score_7_day) > 2.5 THEN 1 ELSE 0 END +
CASE WHEN ABS(z_score_30_day) > 2 THEN 1 ELSE 0 END +
CASE WHEN ABS(seasonal_z_score) > 2.5 THEN 1 ELSE 0 END +
CASE WHEN ABS(pct_dev_7_day) > 30 THEN 1 ELSE 0 END +
CASE WHEN ABS(day_over_day_pct) > 50 THEN 1 ELSE 0 END) as anomaly_score,
-- Anomaly classification with severity
CASE
WHEN ABS(z_score_7_day) > 3.5 OR ABS(seasonal_z_score) > 3.5 THEN 'Critical Anomaly'
WHEN ABS(z_score_7_day) > 2.5 OR ABS(pct_dev_7_day) > 40 THEN 'Severe Anomaly'
WHEN ABS(z_score_7_day) > 2 OR ABS(pct_dev_7_day) > 25 THEN 'Moderate Anomaly'
WHEN ABS(z_score_7_day) > 1.5 OR ABS(pct_dev_7_day) > 15 THEN 'Mild Anomaly'
ELSE 'Normal'
END as anomaly_level,
-- Anomaly type classification
CASE
WHEN z_score_7_day > 2.5 AND seasonal_z_score > 2 THEN 'Compound Positive Spike'
WHEN z_score_7_day < -2.5 AND seasonal_z_score < -2 THEN 'Compound Negative Drop'
WHEN z_score_7_day > 2.5 THEN 'Statistical Positive Spike'
WHEN z_score_7_day < -2.5 THEN 'Statistical Negative Drop'
WHEN seasonal_z_score > 2.5 THEN 'Seasonal Positive Anomaly'
WHEN seasonal_z_score < -2.5 THEN 'Seasonal Negative Anomaly'
WHEN ABS(trend_slope_14_day) > 0.15 THEN 'Trend Anomaly'
ELSE 'Normal Variation'
END as anomaly_type
FROM statistical_analysis sa
),
contextual_analysis AS (
SELECT
ans.*,
-- Context about anomaly clusters
COUNT(CASE WHEN anomaly_score >= 3 THEN 1 END) OVER (
ORDER BY metric_date
ROWS BETWEEN 6 PRECEDING AND 6 FOLLOWING
) as nearby_severe_anomalies,
-- Previous anomaly information
LAG(anomaly_level, 1) OVER (ORDER BY metric_date) as prev_anomaly_level,
LAG(metric_date, 1) OVER (
PARTITION BY CASE WHEN anomaly_score >= 2 THEN 1 ELSE 0 END
ORDER BY metric_date
) as prev_anomaly_date,
-- Recovery pattern analysis
CASE
WHEN anomaly_score >= 3 AND
LEAD(anomaly_score, 1) OVER (ORDER BY metric_date) < 2 AND
LEAD(anomaly_score, 2) OVER (ORDER BY metric_date) < 2 THEN 'Quick Recovery'
WHEN anomaly_score >= 3 AND
LEAD(anomaly_score, 1) OVER (ORDER BY metric_date) >= 2 THEN 'Slow Recovery'
WHEN anomaly_score >= 3 THEN 'Recovery Pending'
ELSE 'N/A'
END as recovery_pattern,
-- Forecast next day's expected range
ma_7_day + (2 * stddev_7_day) as upper_forecast_bound,
ma_7_day - (2 * stddev_7_day) as lower_forecast_bound
FROM anomaly_scoring ans
)
SELECT
metric_date,
metric_value,
ROUND(ma_7_day, 2) as seven_day_average,
ROUND(z_score_7_day, 2) as statistical_z_score,
ROUND(seasonal_z_score, 2) as seasonal_z_score,
anomaly_score,
anomaly_level,
anomaly_type,
nearby_severe_anomalies,
recovery_pattern,
ROUND(upper_forecast_bound, 2) as forecast_upper_bound,
ROUND(lower_forecast_bound, 2) as forecast_lower_bound,
-- Actionable recommendations
CASE
WHEN anomaly_score >= 4 THEN 'IMMEDIATE ESCALATION - Multiple anomaly indicators'
WHEN anomaly_level = 'Critical Anomaly' THEN 'URGENT INVESTIGATION - Statistical significance > 3.5σ'
WHEN anomaly_score = 3 AND nearby_severe_anomalies > 2 THEN 'PATTERN ALERT - Clustered anomalies detected'
WHEN anomaly_level = 'Severe Anomaly' THEN 'HIGH PRIORITY - Investigate root cause'
WHEN anomaly_level = 'Moderate Anomaly' THEN 'MONITOR - Validate against business events'
ELSE 'NORMAL - Continue standard monitoring'
END as recommended_action,
-- Business impact assessment
CASE
WHEN anomaly_type LIKE '%Negative%' AND ABS(pct_dev_7_day) > 40 THEN 'HIGH NEGATIVE IMPACT'
WHEN anomaly_type LIKE '%Positive%' AND ABS(pct_dev_7_day) > 40 THEN 'HIGH POSITIVE IMPACT'
WHEN ABS(pct_dev_7_day) > 25 THEN 'MODERATE IMPACT'
ELSE 'LOW IMPACT'
END as business_impact
FROM contextual_analysis
WHERE metric_date >= SYSDATE - 30 -- Last 30 days
ORDER BY anomaly_score DESC, metric_date DESC;
Sophisticated cohort analysis with machine learning-style retention prediction:
-- SQL Server: Advanced cohort analysis with retention modeling
WITH customer_first_orders AS (
SELECT
customer_id,
MIN(order_date) as first_order_date,
DATEFROMPARTS(YEAR(MIN(order_date)), MONTH(MIN(order_date)), 1) as cohort_month,
MIN(order_amount) as first_order_amount,
COUNT(*) OVER (PARTITION BY DATEFROMPARTS(YEAR(MIN(order_date)), MONTH(MIN(order_date)), 1)) as cohort_size
FROM orders
WHERE order_date >= '2022-01-01'
GROUP BY customer_id
),
customer_orders_enhanced AS (
SELECT
o.customer_id,
o.order_date,
o.order_amount,
cfo.cohort_month,
cfo.first_order_amount,
cfo.cohort_size,
DATEDIFF(month, cfo.first_order_date, o.order_date) as months_since_first_order,
ROW_NUMBER() OVER (PARTITION BY o.customer_id ORDER BY o.order_date) as customer_order_sequence,
-- Customer behavior indicators
o.order_amount / cfo.first_order_amount as order_value_ratio,
CASE
WHEN ROW_NUMBER() OVER (PARTITION BY o.customer_id ORDER BY o.order_date) = 1 THEN 0
ELSE DATEDIFF(day,
LAG(o.order_date) OVER (PARTITION BY o.customer_id ORDER BY o.order_date),
o.order_date
)
END as days_since_prev_order
FROM orders o
JOIN customer_first_orders cfo ON o.customer_id = cfo.customer_id
WHERE o.order_date >= '2022-01-01'
),
cohort_behavioral_metrics AS (
SELECT
cohort_month,
months_since_first_order,
cohort_size,
-- Standard cohort metrics
COUNT(DISTINCT customer_id) as active_customers,
COUNT(*) as total_orders,
SUM(order_amount) as revenue,
AVG(order_amount) as avg_order_value,
COUNT(DISTINCT customer_id) * 100.0 / cohort_size as retention_rate,
-- Advanced behavioral metrics
AVG(order_value_ratio) as avg_order_value_evolution,
AVG(days_since_prev_order) as avg_purchase_interval,
AVG(customer_order_sequence) as avg_order_sequence,
STDEV(order_amount) as order_value_volatility,
-- Customer engagement indicators
COUNT(CASE WHEN customer_order_sequence = 1 THEN 1 END) as new_customer_orders,
COUNT(CASE WHEN customer_order_sequence > 1 THEN 1 END) as repeat_customer_orders,
COUNT(CASE WHEN order_value_ratio > 1.5 THEN 1 END) as high_value_orders,
COUNT(CASE WHEN days_since_prev_order <= 30 THEN 1 END) as frequent_orders
FROM customer_orders_enhanced
GROUP BY cohort_month, months_since_first_order, cohort_size
),
cohort_predictive_analysis AS (
SELECT
cbm.*,
-- Retention momentum analysis
retention_rate - LAG(retention_rate, 1) OVER (
PARTITION BY cohort_month ORDER BY months_since_first_order
) as retention_change_mom,
-- Revenue per customer trends
revenue / cohort_size as revenue_per_original_customer,
(revenue / cohort_size) - LAG(revenue / cohort_size, 1) OVER (
PARTITION BY cohort_month ORDER BY months_since_first_order
) as revenue_change_per_customer,
-- Cross-cohort benchmark comparisons
AVG(retention_rate) OVER (
PARTITION BY months_since_first_order
ORDER BY cohort_month
ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING
) as benchmark_retention_rate,
-- Predictive retention modeling using weighted moving average
CASE
WHEN months_since_first_order >= 3 THEN
(retention_rate * 0.5 +
LAG(retention_rate, 1) OVER (
PARTITION BY cohort_month ORDER BY months_since_first_order
) * 0.3 +
LAG(retention_rate, 2) OVER (
PARTITION BY cohort_month ORDER BY months_since_first_order
) * 0.2) *
-- Apply decay factor based on months since first order
POWER(0.95, months_since_first_order - 3)
END as predicted_next_period_retention,
-- Long-term value prediction
CASE
WHEN months_since_first_order <= 12 AND retention_rate > 10 THEN
(revenue / cohort_size) *
(1 + (0.15 * (12 - months_since_first_order) * (retention_rate / 100.0)))
END as projected_12_month_ltv_per_customer
FROM cohort_behavioral_metrics cbm
),
cohort_quality_assessment AS (
SELECT
cpa.*,
-- Comprehensive cohort health score (0-100)
CASE
WHEN months_since_first_order = 0 THEN 50 -- Baseline for new cohorts
ELSE
LEAST(100, GREATEST(0,
(retention_rate * 0.4) + -- 40% weight on retention
(CASE WHEN avg_order_value_evolution > 1 THEN 25 ELSE 0 END) + -- Value growth bonus
(CASE WHEN revenue_per_original_customer > benchmark_retention_rate * 50 THEN 20 ELSE 0 END) + -- Revenue performance
(CASE WHEN retention_change_mom > -5 THEN 15 ELSE 0 END) -- Retention stability
))
END as cohort_health_score,
-- Strategic classification
CASE
WHEN months_since_first_order <= 1 THEN 'New Cohort'
WHEN retention_rate > benchmark_retention_rate * 1.2 AND avg_order_value_evolution > 1.1 THEN 'Star Performer'
WHEN retention_rate > benchmark_retention_rate AND revenue_per_original_customer > benchmark_retention_rate * 40 THEN 'High Value'
WHEN retention_rate > benchmark_retention_rate * 0.8 AND retention_change_mom > -2 THEN 'Stable'
WHEN retention_rate < benchmark_retention_rate * 0.6 OR retention_change_mom < -10 THEN 'At Risk'
ELSE 'Average'
END as cohort_classification,
-- Investment recommendation
CASE
WHEN projected_12_month_ltv_per_customer > revenue_per_original_customer * 1.8
AND predicted_next_period_retention > retention_rate THEN 'High Investment'
WHEN projected_12_month_ltv_per_customer > revenue_per_original_customer * 1.4 THEN 'Moderate Investment'
WHEN predicted_next_period_retention < retention_rate * 0.8 THEN 'Retention Focus'
WHEN retention_rate < 15 AND months_since_first_order > 6 THEN 'Minimal Investment'
ELSE 'Standard Care'
END as investment_recommendation
FROM cohort_predictive_analysis cpa
)
SELECT
cohort_month,
months_since_first_order,
active_customers,
ROUND(retention_rate, 2) as retention_rate_pct,
ROUND(revenue_per_original_customer, 2) as revenue_per_customer,
ROUND(avg_order_value, 2) as avg_order_value,
ROUND(cohort_health_score, 1) as health_score,
ROUND(predicted_next_period_retention, 2) as predicted_retention,
ROUND(projected_12_month_ltv_per_customer, 2) as projected_ltv,
cohort_classification,
investment_recommendation,
-- Additional insights for decision making
CASE
WHEN retention_change_mom > 5 THEN 'Improving'
WHEN retention_change_mom < -5 THEN 'Declining'
ELSE 'Stable'
END as trend_direction,
ROUND(benchmark_retention_rate, 2) as industry_benchmark_retention
FROM cohort_quality_assessment
WHERE months_since_first_order <= 18 -- 18-month analysis window
ORDER BY cohort_month DESC, months_since_first_order;
-- Advanced customer journey analysis with JSON aggregation
WITH customer_journey_events AS (
SELECT
customer_id,
event_date,
event_type,
event_value,
product_category,
-- Build comprehensive journey timeline
JSON_BUILD_OBJECT(
'timestamp', event_date,
'type', event_type,
'value', event_value,
'category', product_category,
'sequence', ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY event_date)
) as event_json,
-- Advanced array aggregations
ARRAY_AGG(product_category) OVER (
PARTITION BY customer_id
ORDER BY event_date
ROWS BETWEEN 4 PRECEDING AND CURRENT ROW
) as recent_categories,
-- Build behavioral pattern strings
STRING_AGG(
event_type, '->'
) OVER (
PARTITION BY customer_id
ORDER BY event_date
ROWS BETWEEN 3 PRECEDING AND CURRENT ROW
) as behavior_pattern
FROM customer_events
WHERE event_date >= CURRENT_DATE - INTERVAL '1 year'
),
journey_analytics AS (
SELECT
customer_id,
event_date,
event_type,
event_value,
-- Aggregate complete journey as JSON array
JSON_AGG(event_json) OVER (
PARTITION BY customer_id
ORDER BY event_date
ROWS UNBOUNDED PRECEDING
) as complete_journey,
-- Analyze category diversity
CARDINALITY(recent_categories) as categories_in_window,
CARDINALITY(
ARRAY(SELECT DISTINCT unnest(recent_categories))
) as unique_categories_in_window,
-- Pattern frequency analysis
COUNT(*) OVER (
PARTITION BY behavior_pattern
) as pattern_frequency,
-- Calculate journey momentum using JSON path operations
CASE
WHEN JSON_ARRAY_LENGTH(
JSON_AGG(event_json) OVER (
PARTITION BY customer_id
ORDER BY event_date
ROWS UNBOUNDED PRECEDING
)
) >= 2 THEN
(event_value -
(JSON_AGG(event_json) OVER (
PARTITION BY customer_id
ORDER BY event_date
ROWS UNBOUNDED PRECEDING
) -> -2 ->> 'value')::NUMERIC) /
NULLIF((JSON_AGG(event_json) OVER (
PARTITION BY customer_id
ORDER BY event_date
ROWS UNBOUNDED PRECEDING
) -> -2 ->> 'value')::NUMERIC, 0) * 100
END as value_momentum_pct
FROM customer_journey_events
),
customer_behavior_classification AS (
SELECT
customer_id,
behavior_pattern,
pattern_frequency,
unique_categories_in_window,
value_momentum_pct,
-- Advanced behavioral classification
CASE
WHEN pattern_frequency >= 100 AND unique_categories_in_window >= 4 THEN 'Power Explorer'
WHEN pattern_frequency >= 50 AND unique_categories_in_window >= 3 THEN 'Active Explorer'
WHEN pattern_frequency >= 100 AND unique_categories_in_window <= 2 THEN 'Category Specialist'
WHEN value_momentum_pct > 25 THEN 'High Growth Customer'
WHEN value_momentum_pct < -25 THEN 'Declining Customer'
WHEN behavior_pattern LIKE '%purchase->%purchase%' THEN 'Repeat Buyer'
ELSE 'Casual Customer'
END as behavior_type,
-- Journey complexity score
(unique_categories_in_window * 10 +
LEAST(pattern_frequency / 10, 50) +
CASE WHEN value_momentum_pct > 0 THEN 20 ELSE 0 END) as complexity_score,
-- Extract key journey insights using JSON operations
(complete_journey -> 0 ->> 'timestamp')::DATE as first_interaction,
(complete_journey -> -1 ->> 'timestamp')::DATE as latest_interaction,
(complete_journey -> 0 ->> 'value')::NUMERIC as first_value,
(complete_journey -> -1 ->> 'value')::NUMERIC as latest_value
FROM journey_analytics
WHERE event_date >= CURRENT_DATE - INTERVAL '30 days' -- Focus on recent activity
)
SELECT
behavior_type,
COUNT(*) as customer_count,
ROUND(AVG(complexity_score), 1) as avg_complexity_score,
ROUND(AVG(value_momentum_pct), 2) as avg_value_momentum,
ROUND(AVG(pattern_frequency), 0) as avg_pattern_frequency,
ROUND(AVG(unique_categories_in_window), 1) as avg_category_diversity,
-- Business insights
CASE
WHEN AVG(complexity_score) > 60 THEN 'High Engagement Segment'
WHEN AVG(complexity_score) > 40 THEN 'Medium Engagement Segment'
ELSE 'Low Engagement Segment'
END as engagement_level,
-- Strategic recommendations
CASE
WHEN behavior_type = 'Power Explorer' THEN 'VIP Treatment + Cross-sell'
WHEN behavior_type = 'High Growth Customer' THEN 'Nurture + Upsell'
WHEN behavior_type = 'Declining Customer' THEN 'Win-back Campaign'
WHEN behavior_type = 'Category Specialist' THEN 'Category-focused Marketing'
ELSE 'Standard Marketing'
END as recommended_strategy
FROM customer_behavior_classification
GROUP BY behavior_type
ORDER BY avg_complexity_score DESC;
-- Comprehensive financial analysis with advanced Oracle analytics
WITH financial_time_series AS (
SELECT
trading_date,
stock_symbol,
closing_price,
volume,
-- Advanced statistical measures
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY closing_price) OVER (
PARTITION BY stock_symbol
ORDER BY trading_date
RANGE BETWEEN INTERVAL '30' DAY PRECEDING AND CURRENT ROW
) as median_30_day,
STATS_MODE(ROUND(closing_price, 0)) OVER (
PARTITION BY stock_symbol
ORDER BY trading_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
) as mode_30_day,
-- Advanced correlation analysis
CORR(closing_price, volume) OVER (
PARTITION BY stock_symbol
ORDER BY trading_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
) as price_volume_correlation,
-- Statistical hypothesis testing
STATS_T_TEST_ONE(closing_price,
AVG(closing_price) OVER (PARTITION BY stock_symbol)
) OVER (
PARTITION BY stock_symbol
ORDER BY trading_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
) as t_test_vs_mean,
-- Regression analysis
REGR_SLOPE(closing_price, TO_NUMBER(trading_date - DATE '2000-01-01')) OVER (
PARTITION BY stock_symbol
ORDER BY trading_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
) as price_trend_slope,
REGR_R2(closing_price, TO_NUMBER(trading_date - DATE '2000-01-01')) OVER (
PARTITION BY stock_symbol
ORDER BY trading_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
) as trend_r_squared,
-- Advanced volatility measures
STATS_WSR_TEST(
closing_price - LAG(closing_price) OVER (PARTITION BY stock_symbol ORDER BY trading_date),
0
) OVER (
PARTITION BY stock_symbol
ORDER BY trading_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
) as wilcoxon_test_returns
FROM stock_prices
WHERE trading_date >= SYSDATE - 365
),
technical_indicators AS (
SELECT
fts.*,
-- Bollinger Bands with statistical confidence
median_30_day + (2 * STDDEV(closing_price) OVER (
PARTITION BY stock_symbol
ORDER BY trading_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
)) as bollinger_upper,
median_30_day - (2 * STDDEV(closing_price) OVER (
PARTITION BY stock_symbol
ORDER BY trading_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
)) as bollinger_lower,
-- RSI with statistical backing
CASE
WHEN LAG(closing_price) OVER (PARTITION BY stock_symbol ORDER BY trading_date) IS NOT NULL THEN
100 - (100 / (1 +
AVG(CASE WHEN closing_price > LAG(closing_price) OVER (PARTITION BY stock_symbol ORDER BY trading_date)
THEN closing_price - LAG(closing_price) OVER (PARTITION BY stock_symbol ORDER BY trading_date)
ELSE 0 END) OVER (
PARTITION BY stock_symbol
ORDER BY trading_date
ROWS BETWEEN 13 PRECEDING AND CURRENT ROW
) /
NULLIF(AVG(CASE WHEN closing_price < LAG(closing_price) OVER (PARTITION BY stock_symbol ORDER BY trading_date)
THEN LAG(closing_price) OVER (PARTITION BY stock_symbol ORDER BY trading_date) - closing_price
ELSE 0 END) OVER (
PARTITION BY stock_symbol
ORDER BY trading_date
ROWS BETWEEN 13 PRECEDING AND CURRENT ROW
), 0)
))
END as rsi_14_day,
-- Statistical significance of trends
CASE
WHEN trend_r_squared > 0.7 AND ABS(price_trend_slope) > 0.1 THEN 'Strong Trend'
WHEN trend_r_squared > 0.4 AND ABS(price_trend_slope) > 0.05 THEN 'Moderate Trend'
WHEN trend_r_squared < 0.2 THEN 'No Trend'
ELSE 'Weak Trend'
END as trend_significance
FROM financial_time_series fts
),
risk_assessment AS (
SELECT
ti.*,
-- Value at Risk (VaR) calculation
PERCENTILE_CONT(0.05) WITHIN GROUP (
ORDER BY (closing_price - LAG(closing_price) OVER (
PARTITION BY stock_symbol ORDER BY trading_date
)) / LAG(closing_price) OVER (PARTITION BY stock_symbol ORDER BY trading_date) * 100
) OVER (
PARTITION BY stock_symbol
ORDER BY trading_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
) as var_5_percent,
-- Expected Shortfall (Conditional VaR)
AVG(
CASE WHEN (closing_price - LAG(closing_price) OVER (
PARTITION BY stock_symbol ORDER BY trading_date
)) / LAG(closing_price) OVER (PARTITION BY stock_symbol ORDER BY trading_date) * 100
<= PERCENTILE_CONT(0.05) WITHIN GROUP (
ORDER BY (closing_price - LAG(closing_price) OVER (
PARTITION BY stock_symbol ORDER BY trading_date
)) / LAG(closing_price) OVER (PARTITION BY stock_symbol ORDER BY trading_date) * 100
) OVER (
PARTITION BY stock_symbol
ORDER BY trading_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
)
THEN (closing_price - LAG(closing_price) OVER (
PARTITION BY stock_symbol ORDER BY trading_date
)) / LAG(closing_price) OVER (PARTITION BY stock_symbol ORDER BY trading_date) * 100
END
) OVER (
PARTITION BY stock_symbol
ORDER BY trading_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
) as expected_shortfall,
-- Sharpe ratio approximation
AVG((closing_price - LAG(closing_price) OVER (
PARTITION BY stock_symbol ORDER BY trading_date
)) / LAG(closing_price) OVER (PARTITION BY stock_symbol ORDER BY trading_date) * 100) OVER (
PARTITION BY stock_symbol
ORDER BY trading_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
) / NULLIF(STDDEV((closing_price - LAG(closing_price) OVER (
PARTITION BY stock_symbol ORDER BY trading_date
)) / LAG(closing_price) OVER (PARTITION BY stock_symbol ORDER BY trading_date) * 100) OVER (
PARTITION BY stock_symbol
ORDER BY trading_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
), 0) as sharpe_ratio_30_day
FROM technical_indicators ti
)
SELECT
stock_symbol,
trading_date,
closing_price,
ROUND(median_30_day, 2) as median_price_30d,
ROUND(bollinger_upper, 2) as bollinger_upper,
ROUND(bollinger_lower, 2) as bollinger_lower,
ROUND(rsi_14_day, 2) as rsi_14,
trend_significance,
ROUND(trend_r_squared, 3) as trend_reliability,
ROUND(var_5_percent, 3) as daily_var_5pct,
ROUND(expected_shortfall, 3) as expected_shortfall_5pct,
ROUND(sharpe_ratio_30_day, 3) as sharpe_ratio_30d,
-- Trading signals based on multiple indicators
CASE
WHEN closing_price > bollinger_upper AND rsi_14_day > 70 THEN 'Strong Sell Signal'
WHEN closing_price < bollinger_lower AND rsi_14_day < 30 THEN 'Strong Buy Signal'
WHEN trend_significance = 'Strong Trend' AND price_trend_slope > 0 AND rsi_14_day < 60 THEN 'Trend Buy'
WHEN trend_significance = 'Strong Trend' AND price_trend_slope < 0 AND rsi_14_day > 40 THEN 'Trend Sell'
ELSE 'Hold'
END as trading_signal,
-- Risk classification
CASE
WHEN ABS(var_5_percent) > 5 OR ABS(expected_shortfall) > 7 THEN 'High Risk'
WHEN ABS(var_5_percent) > 3 OR ABS(expected_shortfall) > 4 THEN 'Medium Risk'
ELSE 'Low Risk'
END as risk_classification
FROM risk_assessment
WHERE trading_date >= SYSDATE - 60 -- Last 2 months
ORDER BY stock_symbol, trading_date DESC;
-- SQL Server: Optimized for billions of rows with partitioning
-- Table structure with optimal partitioning
CREATE TABLE sales_data_optimized (
sale_date DATE NOT NULL,
customer_id BIGINT NOT NULL,
product_id INT NOT NULL,
amount DECIMAL(12,2) NOT NULL,
region_id SMALLINT NOT NULL,
INDEX IX_sales_customer_date_amount NONCLUSTERED (customer_id, sale_date, amount),
INDEX IX_sales_date_region_covering NONCLUSTERED (sale_date, region_id)
INCLUDE (customer_id, product_id, amount)
) ON ps_sales_date(sale_date); -- Partitioned by date
-- Highly optimized window function query
WITH monthly_batch_processing AS (
-- Process data in monthly batches to reduce memory usage
SELECT
DATE_TRUNC(month, sale_date) as month,
customer_id,
product_id,
region_id,
SUM(amount) as monthly_amount,
COUNT(*) as monthly_transactions,
-- Pre-aggregate for better window function performance
NTILE(20) OVER (ORDER BY customer_id) as processing_batch
FROM sales_data_optimized
WHERE sale_date >= DATEADD(month, -24, GETDATE()) -- Enable partition elimination
GROUP BY DATE_TRUNC(month, sale_date), customer_id, product_id, region_id
),
optimized_window_calculations AS (
-- Window functions on pre-aggregated data perform much better
SELECT
mbp.*,
-- Customer lifetime metrics
SUM(monthly_amount) OVER (
PARTITION BY customer_id
ORDER BY month
ROWS UNBOUNDED PRECEDING
) as customer_lifetime_value,
-- Running averages with controlled frame sizes
AVG(monthly_amount) OVER (
PARTITION BY customer_id
ORDER BY month
ROWS BETWEEN 5 PRECEDING AND CURRENT ROW -- 6-month window
) as customer_6m_avg,
-- Regional benchmarking
PERCENT_RANK() OVER (
PARTITION BY month, region_id
ORDER BY monthly_amount
) as regional_percentile_rank,
-- Product performance tracking
RANK() OVER (
PARTITION BY month, product_id
ORDER BY monthly_amount DESC
) as monthly_product_rank,
-- Efficient batch processing indicator
ROW_NUMBER() OVER (
PARTITION BY processing_batch, month
ORDER BY customer_id
) as batch_sequence
FROM monthly_batch_processing mbp
),
final_analytics AS (
-- Additional calculations on already windowed data
SELECT
owc.*,
-- Growth calculations
monthly_amount - LAG(monthly_amount, 1) OVER (
PARTITION BY customer_id
ORDER BY month
) as month_over_month_growth,
-- Customer segmentation based on multiple factors
CASE
WHEN regional_percentile_rank >= 0.95 AND customer_6m_avg > 1000 THEN 'Premium'
WHEN regional_percentile_rank >= 0.80 AND customer_lifetime_value > 5000 THEN 'High Value'
WHEN regional_percentile_rank >= 0.60 THEN 'Standard'
ELSE 'Budget'
END as customer_segment,
-- Trend classification
CASE
WHEN month_over_month_growth > customer_6m_avg * 0.1 THEN 'High Growth'
WHEN month_over_month_growth > 0 THEN 'Growing'
WHEN month_over_month_growth > -customer_6m_avg * 0.1 THEN 'Stable'
ELSE 'Declining'
END as growth_trend
FROM optimized_window_calculations owc
)
-- Final output with business insights
SELECT
customer_segment,
growth_trend,
COUNT(*) as customer_month_records,
ROUND(AVG(customer_lifetime_value), 2) as avg_ltv,
ROUND(AVG(customer_6m_avg), 2) as avg_monthly_spend,
ROUND(AVG(month_over_month_growth), 2) as avg_mom_growth,
COUNT(DISTINCT customer_id) as unique_customers
FROM final_analytics
WHERE month >= DATEADD(month, -12, GETDATE()) -- Focus on recent year
GROUP BY customer_segment, growth_trend
ORDER BY avg_ltv DESC;
-- PostgreSQL: Handling massive datasets with streaming window functions
CREATE OR REPLACE FUNCTION process_large_customer_analytics(
p_start_date DATE,
p_end_date DATE,
p_batch_size INTEGER DEFAULT 10000
)
RETURNS TABLE (
customer_id BIGINT,
analysis_date DATE,
lifetime_value NUMERIC,
behavior_score NUMERIC,
risk_category TEXT
)
LANGUAGE plpgsql
AS $
DECLARE
batch_start BIGINT;
batch_end BIGINT;
total_customers BIGINT;
BEGIN
-- Get total customer count for batching
SELECT COUNT(DISTINCT o.customer_id) INTO total_customers
FROM orders o
WHERE o.order_date BETWEEN p_start_date AND p_end_date;
-- Process in batches to manage memory
FOR batch_start IN 0..total_customers BY p_batch_size LOOP
batch_end := LEAST(batch_start + p_batch_size - 1, total_customers);
RETURN QUERY
WITH customer_batch AS (
SELECT DISTINCT o.customer_id,
ROW_NUMBER() OVER (ORDER BY o.customer_id) - 1 as customer_rank
FROM orders o
WHERE o.order_date BETWEEN p_start_date AND p_end_date
),
filtered_customers AS (
SELECT customer_id
FROM customer_batch
WHERE customer_rank BETWEEN batch_start AND batch_end
),
customer_metrics AS (
SELECT
o.customer_id,
p_end_date as analysis_date,
-- Optimized window functions with limited scope
SUM(o.amount) OVER (
PARTITION BY o.customer_id
ORDER BY o.order_date
ROWS UNBOUNDED PRECEDING
) as running_total,
-- Behavior scoring with controlled windows
(COUNT(*) OVER (PARTITION BY o.customer_id) * 10 +
AVG(o.amount) OVER (PARTITION BY o.customer_id) / 10 +
CASE WHEN MAX(o.order_date) OVER (PARTITION BY o.customer_id) >
CURRENT_DATE - INTERVAL '90 days' THEN 50 ELSE 0 END
) as behavior_raw_score,
-- Risk indicators
STDDEV(o.amount) OVER (PARTITION BY o.customer_id) /
NULLIF(AVG(o.amount) OVER (PARTITION BY o.customer_id), 0) as volatility_ratio
FROM orders o
INNER JOIN filtered_customers fc ON o.customer_id = fc.customer_id
WHERE o.order_date BETWEEN p_start_date AND p_end_date
)
SELECT DISTINCT
cm.customer_id,
cm.analysis_date,
ROUND(cm.running_total, 2) as lifetime_value,
ROUND(cm.behavior_raw_score, 2) as behavior_score,
CASE
WHEN cm.volatility_ratio > 2.0 THEN 'High Risk'
WHEN cm.volatility_ratio > 1.0 THEN 'Medium Risk'
ELSE 'Low Risk'
END as risk_category
FROM customer_metrics cm;
END LOOP;
END;
$;
-- Usage example for processing millions of customers efficiently
SELECT * FROM process_large_customer_analytics(
'2023-01-01'::DATE,
'2024-12-31'::DATE,
5000 -- Process 5000 customers at a time
)
ORDER BY lifetime_value DESC
LIMIT 1000;
-- MySQL: Real-time customer behavior scoring
CREATE VIEW real_time_customer_scores AS
WITH recent_activity AS (
SELECT
customer_id,
activity_timestamp,
activity_type,
activity_value,
-- Real-time scoring based on recent activity
ROW_NUMBER() OVER (
PARTITION BY customer_id
ORDER BY activity_timestamp DESC
) as recency_rank,
-- Activity velocity (events per hour)
COUNT(*) OVER (
PARTITION BY customer_id
ORDER BY activity_timestamp
RANGE BETWEEN INTERVAL 1 HOUR PRECEDING AND CURRENT ROW
) as hourly_activity_count,
-- Value momentum in last 24 hours
SUM(activity_value) OVER (
PARTITION BY customer_id
ORDER BY activity_timestamp
RANGE BETWEEN INTERVAL 1 DAY PRECEDING AND CURRENT ROW
) as daily_value_total
FROM customer_activities
WHERE activity_timestamp >= NOW() - INTERVAL 7 DAY
),
engagement_scoring AS (
SELECT
customer_id,
-- Multi-dimensional engagement score
(CASE WHEN recency_rank = 1 AND activity_timestamp >= NOW() - INTERVAL 1 HOUR THEN 50 ELSE 0 END +
LEAST(hourly_activity_count * 5, 25) + -- Up to 25 points for activity frequency
LEAST(daily_value_total / 10, 25) -- Up to 25 points for value
) as engagement_score,
-- Risk indicators
CASE
WHEN MAX(activity_timestamp) < NOW() - INTERVAL 3 DAY THEN 'Inactive'
WHEN hourly_activity_count > 20 THEN 'Hyperactive'
WHEN daily_value_total > 1000 THEN 'High Value Activity'
ELSE 'Normal'
END as activity_pattern,
-- Personalization signals
CASE
WHEN engagement_score >= 75 THEN 'Immediate Engagement'
WHEN engagement_score >= 50 THEN 'Active Engagement'
WHEN engagement_score >= 25 THEN 'Moderate Engagement'
ELSE 'Low Engagement'
END as engagement_tier
FROM recent_activity
GROUP BY customer_id
)
SELECT
es.customer_id,
es.engagement_score,
es.activity_pattern,
es.engagement_tier,
NOW() as score_timestamp,
-- Action recommendations
CASE
WHEN es.engagement_tier = 'Immediate Engagement' THEN 'Push personalized offers'
WHEN es.engagement_tier = 'Active Engagement' THEN 'Show recommended products'
WHEN es.activity_pattern = 'Inactive' THEN 'Send re-engagement email'
WHEN es.activity_pattern = 'Hyperactive' THEN 'Provide premium support'
ELSE 'Standard experience'
END as recommended_action
FROM engagement_scoring es;
-- Data quality anomaly detection across multiple dimensions
WITH data_quality_metrics AS (
SELECT
table_name,
column_name,
check_timestamp,
null_count,
total_rows,
duplicate_count,
-- Quality trend analysis
null_count * 100.0 / total_rows as null_percentage,
duplicate_count * 100.0 / total_rows as duplicate_percentage,
-- Historical comparison
LAG(null_count * 100.0 / total_rows, 1) OVER (
PARTITION BY table_name, column_name
ORDER BY check_timestamp
) as prev_null_percentage,
-- Moving baseline for anomaly detection
AVG(null_count * 100.0 / total_rows) OVER (
PARTITION BY table_name, column_name
ORDER BY check_timestamp
ROWS BETWEEN 6 PRECEDING AND 1 PRECEDING -- Exclude current day
) as baseline_null_percentage,
STDDEV(null_count * 100.0 / total_rows) OVER (
PARTITION BY table_name, column_name
ORDER BY check_timestamp
ROWS BETWEEN 6 PRECEDING AND 1 PRECEDING
) as null_percentage_stddev
FROM data_quality_checks
WHERE check_timestamp >= CURRENT_DATE - INTERVAL '30 days'
),
quality_anomaly_detection AS (
SELECT
dqm.*,
-- Anomaly scoring
ABS(null_percentage - baseline_null_percentage) /
NULLIF(null_percentage_stddev, 0) as null_z_score,
-- Quality degradation detection
null_percentage - prev_null_percentage as null_percentage_change,
-- Quality classification
CASE
WHEN null_percentage > baseline_null_percentage + (2 * null_percentage_stddev) THEN 'Quality Alert'
WHEN null_percentage > baseline_null_percentage + null_percentage_stddev THEN 'Quality Warning'
WHEN null_percentage < baseline_null_percentage - null_percentage_stddev THEN 'Quality Improvement'
ELSE 'Normal'
END as quality_status,
-- Trend analysis
CASE
WHEN null_percentage_change > 2 THEN 'Degrading'
WHEN null_percentage_change < -2 THEN 'Improving'
ELSE 'Stable'
END as quality_trend
FROM data_quality_metrics dqm
WHERE baseline_null_percentage IS NOT NULL
)
SELECT
table_name,
column_name,
check_timestamp,
ROUND(null_percentage, 2) as current_null_pct,
ROUND(baseline_null_percentage, 2) as baseline_null_pct,
ROUND(null_z_score, 2) as anomaly_score,
quality_status,
quality_trend,
-- Action recommendations
CASE
WHEN quality_status = 'Quality Alert' AND quality_trend = 'Degrading' THEN 'IMMEDIATE: Investigate data pipeline'
WHEN quality_status = 'Quality Alert' THEN 'HIGH: Review data source changes'
WHEN quality_status = 'Quality Warning' AND null_z_score > 1.5 THEN 'MEDIUM: Monitor closely'
WHEN quality_status = 'Quality Improvement' THEN 'INFO: Quality enhancement detected'
ELSE 'NORMAL: Continue monitoring'
END as recommended_action
FROM quality_anomaly_detection
WHERE check_timestamp >= CURRENT_DATE - INTERVAL '7 days'
ORDER BY null_z_score DESC NULLS LAST, check_timestamp DESC;
-- Advanced RFM analysis with predictive scoring
WITH customer_rfm_base AS (
SELECT
customer_id,
MAX(order_date) as last_order_date,
COUNT(*) as frequency,
SUM(amount) as monetary_value,
AVG(amount) as avg_order_value,
-- Advanced recency calculation
CURRENT_DATE - MAX(order_date) as days_since_last_order,
-- Customer lifecycle position
COUNT(*) * 365.0 / (MAX(order_date) - MIN(order_date) + 1) as annual_frequency,
-- Trend analysis
REGR_SLOPE(amount, EXTRACT(EPOCH FROM order_date)) as spending_trend,
-- Seasonality indicators
MODE() WITHIN GROUP (ORDER BY EXTRACT(month FROM order_date)) as preferred_month,
MODE() WITHIN GROUP (ORDER BY EXTRACT(dow FROM order_date)) as preferred_dow
FROM orders
WHERE order_date >= CURRENT_DATE - INTERVAL '2 years'
GROUP BY customer_id
),
rfm_scoring AS (
SELECT
crb.*,
-- Advanced RFM scoring with statistical distribution
CASE
WHEN days_since_last_order <= 30 THEN 5
WHEN days_since_last_order <= 60 THEN 4
WHEN days_since_last_order <= 120 THEN 3
WHEN days_since_last_order <= 180 THEN 2
ELSE 1
END as recency_score,
NTILE(5) OVER (ORDER BY frequency DESC) as frequency_score,
NTILE(5) OVER (ORDER BY monetary_value DESC) as monetary_score,
-- Additional behavioral scores
NTILE(5) OVER (ORDER BY avg_order_value DESC) as aov_score,
CASE
WHEN spending_trend > 0.1 THEN 5
WHEN spending_trend > 0.05 THEN 4
WHEN spending_trend > -0.05 THEN 3
WHEN spending_trend > -0.1 THEN 2
ELSE 1
END as trend_score
FROM customer_rfm_base crb
),
advanced_segmentation AS (
SELECT
rs.*,
-- Composite RFM score
recency_score + frequency_score + monetary_score as rfm_total_score,
-- Multi-dimensional classification
CASE
WHEN recency_score >= 4 AND frequency_score >= 4 AND monetary_score >= 4 THEN 'Champions'
WHEN recency_score >= 3 AND frequency_score >= 3 AND monetary_score >= 4 THEN 'Loyal Customers'
WHEN recency_score >= 4 AND frequency_score <= 2 AND monetary_score >= 3 THEN 'Potential Loyalists'
WHEN recency_score >= 4 AND frequency_score <= 2 AND monetary_score <= 2 THEN 'New Customers'
WHEN recency_score >= 3 AND frequency_score >= 3 AND monetary_score <= 3 THEN 'Promising'
WHEN recency_score <= 2 AND frequency_score >= 3 AND monetary_score >= 3 THEN 'At Risk'
WHEN recency_score <= 2 AND frequency_score <= 2 AND monetary_score >= 4 THEN 'Cannot Lose Them'
WHEN recency_score <= 2 AND frequency_score <= 2 AND monetary_score <= 2 THEN 'Hibernating'
ELSE 'Others'
END as rfm_segment,
-- Behavioral pattern classification
CASE
WHEN trend_score >= 4 AND aov_score >= 4 THEN 'High Growth High Value'
WHEN trend_score >= 4 THEN 'High Growth'
WHEN aov_score >= 4 AND frequency_score >= 4 THEN 'High Value Frequent'
WHEN frequency_score >= 4 THEN 'High Frequency'
WHEN aov_score >= 4 THEN 'High Value'
ELSE 'Standard'
END as behavior_pattern,
-- Customer lifetime value prediction
CASE
WHEN rfm_total_score >= 12 THEN monetary_value * 1.5 + (annual_frequency * avg_order_value * 0.8)
WHEN rfm_total_score >= 9 THEN monetary_value * 1.2 + (annual_frequency * avg_order_value * 0.6)
WHEN rfm_total_score >= 6 THEN monetary_value * 1.0 + (annual_frequency * avg_order_value * 0.4)
ELSE monetary_value * 0.8
END as predicted_ltv_1_year
FROM rfm_scoring rs
)
SELECT
rfm_segment,
behavior_pattern,
COUNT(*) as customer_count,
ROUND(AVG(monetary_value), 2) as avg_historical_value,
ROUND(AVG(predicted_ltv_1_year), 2) as avg_predicted_ltv,
ROUND(AVG(frequency), 1) as avg_frequency,
ROUND(AVG(days_since_last_order), 1) as avg_days_since_last_order,
-- Strategic recommendations
CASE
WHEN rfm_segment = 'Champions' THEN 'Reward loyalty, upsell premium products'
WHEN rfm_segment = 'At Risk' THEN 'Win-back campaigns, special offers'
WHEN rfm_segment = 'Cannot Lose Them' THEN 'Personalized retention offers'
WHEN rfm_segment = 'New Customers' THEN 'Onboarding sequence, education'
WHEN rfm_segment = 'Potential Loyalists' THEN 'Membership programs, engagement'
ELSE 'Standard marketing approach'
END as marketing_strategy,
-- Investment priority
ROUND(
AVG(predicted_ltv_1_year) / 100.0 *
COUNT(*) / (SELECT COUNT(*) FROM advanced_segmentation) * 1000, 2
) as segment_investment_score
FROM advanced_segmentation
GROUP BY rfm_segment, behavior_pattern
ORDER BY avg_predicted_ltv DESC;
You've now seen window functions at their most powerful - solving complex business problems that would be nearly impossible with traditional SQL. These advanced patterns demonstrate:
Advanced window functions transform SQL from a simple query language into a complete analytical programming environment. You can now:
While window functions are incredibly powerful, recognize when to complement them with:
Master these patterns by:
Window functions represent the pinnacle of SQL analytical capability. With these advanced patterns, you can solve sophisticated problems that previously required specialized tools or complex application code. The key is knowing when and how to apply these techniques to create maximum business value.