江门市护送信息网

SQL语句实现用户连续登录最长天数

2026-04-09 07:12:02 浏览次数:0
详细信息

方法1:使用ROW_NUMBER()和DATE_SUB(推荐)

-- 以用户id分组,按登录日期排序
WITH ranked_logs AS (
    SELECT 
        user_id,
        login_date,
        ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY login_date) AS rn
    FROM login_records
),
-- 计算日期与序号的差值,连续日期差值相同
date_groups AS (
    SELECT 
        user_id,
        login_date,
        DATE_SUB(login_date, INTERVAL rn DAY) AS date_group
    FROM ranked_logs
    -- 如果需要去重,可以添加DISTINCT或先GROUP BY
    GROUP BY user_id, login_date
),
-- 按用户和差值分组,统计连续天数
consecutive_counts AS (
    SELECT 
        user_id,
        date_group,
        COUNT(*) AS consecutive_days,
        MIN(login_date) AS start_date,
        MAX(login_date) AS end_date
    FROM date_groups
    GROUP BY user_id, date_group
)
-- 找出每个用户的最长连续登录天数
SELECT 
    user_id,
    MAX(consecutive_days) AS max_consecutive_days
FROM consecutive_counts
GROUP BY user_id;

方法2:使用LAG函数判断连续性

WITH daily_login AS (
    -- 确保每天只有一条记录(去重)
    SELECT DISTINCT user_id, login_date
    FROM login_records
),
lagged_dates AS (
    SELECT 
        user_id,
        login_date,
        LAG(login_date) OVER (PARTITION BY user_id ORDER BY login_date) AS prev_date
    FROM daily_login
),
-- 标记连续性的开始
streak_starts AS (
    SELECT 
        user_id,
        login_date,
        CASE 
            -- 第一条记录或间隔超过1天,是新连续段的开始
            WHEN prev_date IS NULL OR DATEDIFF(login_date, prev_date) > 1 
            THEN 1 
            ELSE 0 
        END AS is_streak_start
    FROM lagged_dates
),
-- 为每个连续段分配组号
streak_groups AS (
    SELECT 
        user_id,
        login_date,
        SUM(is_streak_start) OVER (PARTITION BY user_id ORDER BY login_date) AS streak_id
    FROM streak_starts
)
-- 统计最长连续天数
SELECT 
    user_id,
    MAX(streak_length) AS max_consecutive_days
FROM (
    SELECT 
        user_id,
        streak_id,
        COUNT(*) AS streak_length
    FROM streak_groups
    GROUP BY user_id, streak_id
) AS streak_counts
GROUP BY user_id;

方法3:使用递归CTE(适用于数据库支持)

WITH RECURSIVE consecutive_streaks AS (
    -- 基础查询:每个用户的每次登录作为连续段的起点
    SELECT 
        user_id,
        login_date,
        login_date AS current_date,
        1 AS consecutive_days
    FROM login_records

    UNION ALL

    -- 递归查询:查找下一天的登录
    SELECT 
        cs.user_id,
        cs.login_date,
        DATE_ADD(cs.current_date, INTERVAL 1 DAY),
        cs.consecutive_days + 1
    FROM consecutive_streaks cs
    INNER JOIN login_records lr 
        ON cs.user_id = lr.user_id 
        AND lr.login_date = DATE_ADD(cs.current_date, INTERVAL 1 DAY)
)
-- 找出每个起点的最大连续天数,然后取最大值
SELECT 
    user_id,
    MAX(consecutive_days) AS max_consecutive_days
FROM consecutive_streaks
GROUP BY user_id;

方法4:简单的自连接方式

WITH daily_login AS (
    SELECT DISTINCT user_id, login_date
    FROM login_records
)
SELECT 
    dl1.user_id,
    MAX(DATEDIFF(dl2.login_date, dl1.login_date) + 1) AS max_consecutive_days
FROM daily_login dl1
-- 连接后续所有可能的结束日期
JOIN daily_login dl2 
    ON dl1.user_id = dl2.user_id 
    AND dl2.login_date >= dl1.login_date
-- 确保日期范围内每一天都有登录
WHERE NOT EXISTS (
    SELECT 1
    FROM daily_login dl3
    WHERE dl3.user_id = dl1.user_id
        AND dl3.login_date BETWEEN dl1.login_date AND dl2.login_date
        AND NOT EXISTS (
            SELECT 1
            FROM daily_login dl4
            WHERE dl4.user_id = dl1.user_id
                AND dl4.login_date = dl3.login_date
        )
)
GROUP BY dl1.user_id;

完整的示例表结构和数据

-- 创建示例表
CREATE TABLE login_records (
    id INT PRIMARY KEY AUTO_INCREMENT,
    user_id INT NOT NULL,
    login_date DATE NOT NULL,
    INDEX idx_user_date (user_id, login_date)
);

-- 插入示例数据
INSERT INTO login_records (user_id, login_date) VALUES
(1, '2024-01-01'),
(1, '2024-01-02'),
(1, '2024-01-03'),
(1, '2024-01-05'),  -- 中断
(1, '2024-01-06'),
(1, '2024-01-07'),
(2, '2024-01-01'),
(2, '2024-01-02'),
(2, '2024-01-03'),
(2, '2024-01-04'),
(2, '2024-01-05'),
(2, '2024-01-10'); -- 中断

最佳实践建议

方法1(ROW_NUMBER + DATE_SUB)通常是最优选择,因为:

性能优化

-- 创建合适索引
CREATE INDEX idx_login_user_date ON login_records(user_id, login_date);

-- 提前去重(如果每天可能有多次登录)
WITH daily_login AS (
    SELECT DISTINCT user_id, DATE(login_time) AS login_date
    FROM login_records
)
-- 然后使用上述方法...

考虑边界情况

选择哪种方法取决于你的数据库类型、数据量大小和具体需求。

相关推荐