方法1:使用ROW_NUMBER()和DATE_SUB(推荐)
-- 以用户id分组,按登录日期排序
WITH ranked_logs AS (
SELECT
user_id,
login_date,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY login_date) AS rn
FROM login_records
),
-- 计算日期与序号的差值,连续日期差值相同
date_groups AS (
SELECT
user_id,
login_date,
DATE_SUB(login_date, INTERVAL rn DAY) AS date_group
FROM ranked_logs
-- 如果需要去重,可以添加DISTINCT或先GROUP BY
GROUP BY user_id, login_date
),
-- 按用户和差值分组,统计连续天数
consecutive_counts AS (
SELECT
user_id,
date_group,
COUNT(*) AS consecutive_days,
MIN(login_date) AS start_date,
MAX(login_date) AS end_date
FROM date_groups
GROUP BY user_id, date_group
)
-- 找出每个用户的最长连续登录天数
SELECT
user_id,
MAX(consecutive_days) AS max_consecutive_days
FROM consecutive_counts
GROUP BY user_id;
方法2:使用LAG函数判断连续性
WITH daily_login AS (
-- 确保每天只有一条记录(去重)
SELECT DISTINCT user_id, login_date
FROM login_records
),
lagged_dates AS (
SELECT
user_id,
login_date,
LAG(login_date) OVER (PARTITION BY user_id ORDER BY login_date) AS prev_date
FROM daily_login
),
-- 标记连续性的开始
streak_starts AS (
SELECT
user_id,
login_date,
CASE
-- 第一条记录或间隔超过1天,是新连续段的开始
WHEN prev_date IS NULL OR DATEDIFF(login_date, prev_date) > 1
THEN 1
ELSE 0
END AS is_streak_start
FROM lagged_dates
),
-- 为每个连续段分配组号
streak_groups AS (
SELECT
user_id,
login_date,
SUM(is_streak_start) OVER (PARTITION BY user_id ORDER BY login_date) AS streak_id
FROM streak_starts
)
-- 统计最长连续天数
SELECT
user_id,
MAX(streak_length) AS max_consecutive_days
FROM (
SELECT
user_id,
streak_id,
COUNT(*) AS streak_length
FROM streak_groups
GROUP BY user_id, streak_id
) AS streak_counts
GROUP BY user_id;
方法3:使用递归CTE(适用于数据库支持)
WITH RECURSIVE consecutive_streaks AS (
-- 基础查询:每个用户的每次登录作为连续段的起点
SELECT
user_id,
login_date,
login_date AS current_date,
1 AS consecutive_days
FROM login_records
UNION ALL
-- 递归查询:查找下一天的登录
SELECT
cs.user_id,
cs.login_date,
DATE_ADD(cs.current_date, INTERVAL 1 DAY),
cs.consecutive_days + 1
FROM consecutive_streaks cs
INNER JOIN login_records lr
ON cs.user_id = lr.user_id
AND lr.login_date = DATE_ADD(cs.current_date, INTERVAL 1 DAY)
)
-- 找出每个起点的最大连续天数,然后取最大值
SELECT
user_id,
MAX(consecutive_days) AS max_consecutive_days
FROM consecutive_streaks
GROUP BY user_id;
方法4:简单的自连接方式
WITH daily_login AS (
SELECT DISTINCT user_id, login_date
FROM login_records
)
SELECT
dl1.user_id,
MAX(DATEDIFF(dl2.login_date, dl1.login_date) + 1) AS max_consecutive_days
FROM daily_login dl1
-- 连接后续所有可能的结束日期
JOIN daily_login dl2
ON dl1.user_id = dl2.user_id
AND dl2.login_date >= dl1.login_date
-- 确保日期范围内每一天都有登录
WHERE NOT EXISTS (
SELECT 1
FROM daily_login dl3
WHERE dl3.user_id = dl1.user_id
AND dl3.login_date BETWEEN dl1.login_date AND dl2.login_date
AND NOT EXISTS (
SELECT 1
FROM daily_login dl4
WHERE dl4.user_id = dl1.user_id
AND dl4.login_date = dl3.login_date
)
)
GROUP BY dl1.user_id;
完整的示例表结构和数据
-- 创建示例表
CREATE TABLE login_records (
id INT PRIMARY KEY AUTO_INCREMENT,
user_id INT NOT NULL,
login_date DATE NOT NULL,
INDEX idx_user_date (user_id, login_date)
);
-- 插入示例数据
INSERT INTO login_records (user_id, login_date) VALUES
(1, '2024-01-01'),
(1, '2024-01-02'),
(1, '2024-01-03'),
(1, '2024-01-05'), -- 中断
(1, '2024-01-06'),
(1, '2024-01-07'),
(2, '2024-01-01'),
(2, '2024-01-02'),
(2, '2024-01-03'),
(2, '2024-01-04'),
(2, '2024-01-05'),
(2, '2024-01-10'); -- 中断
最佳实践建议
方法1(ROW_NUMBER + DATE_SUB)通常是最优选择,因为:
- 逻辑清晰,易于理解
- 性能较好,只需扫描一次数据
- 兼容大部分数据库(MySQL、PostgreSQL、SQL Server等)
性能优化:
-- 创建合适索引
CREATE INDEX idx_login_user_date ON login_records(user_id, login_date);
-- 提前去重(如果每天可能有多次登录)
WITH daily_login AS (
SELECT DISTINCT user_id, DATE(login_time) AS login_date
FROM login_records
)
-- 然后使用上述方法...
考虑边界情况:
- 单日多次登录
- 数据缺失(周末不登录)
- 跨月/跨年登录
选择哪种方法取决于你的数据库类型、数据量大小和具体需求。