중요! Q19. 20년 7월 일별 고과금 유저 TOP3
SELECT
FROM
(
SELECT DATE_FORMAT(purchased_at - INTERVAL 9 HOUR, '%Y-%m-%d') AS d_date
, customer_id
, SUM(price)
, DENSE_RANK() OVER(PARTITION BY DATE_FORMAT(purchased_at - INTERVAL 9 HOUR, '%Y-%m-%d') ORDER BY SUM(price) DESC) AS rank_rev
FROM fastcampus.tbl_purchase
WHERE purchased_at >='2020-07-01'
AND purchased_at < '2020-08-01'
GROUP BY 1, 2
) AS t1
WHERE rank_rev < 4;
rank :12245
dense_rank : 12234
row_number : 12345
Product 분석심화(with함수) : 가입일, 최초구매일
Q 20년 7월 우리 신규 유저가 하루 안에 결제로 넘어가는 비율이 어떻게 되나요? 그 비율이 어떤지 알고싶고, 결제까지 보통 몇분 정도 소요?
- 신규유저의 가입일
- 최초 구매일
WITH rt_tbl AS (
SELECT A.customer_id
, A.created_at
, B.customer_id AS paying_user
, B.purchased_at
, TIME_TO_SEC(TIMEDIFF(A.created_at, B.purchased_at))/3600 AS diff_hour
FROM fastcampus.tbl_customer AS A
LEFT JOIN
(SELECT customer_id
, MIN(purchased_at) AS purchased_at
FROM fastcampus.tbl_purchase
) AS B
ON A.customer_id = B.customer_id
AND B.purchased_at < A.created_at + INTERVAL 1 DAY
WHERE A.created_at >='2020-07-01'
AND A.created_at<'2020-08-01'
)
SELECT ROUND(COUNT(paying_user)/COUNT(customer_id)*100, 2)
, AVG(diff_hour)
FROM rt_tbl
--내가 쓴 답
SELECT ROUND(COUNT(t2.customer)/COUNT(t1.customer)*100, 2) AS '구매비율'
AVG(TIMEDIFF(t1.created_at, t2.purchased-at) )AS '결제까지소요시간',
FROM(
SELECT t1.customer_id, t2.customer_id
, t1.created_at
, MIN(t2.purchased_at)
FROM fastcampus.tbl_customer AS t1
LEFT JOIN fastcampus.tbl_purchased AS t2
ON t1.customer_id = t2.customer_id
WHERE t1.created_at >='2020-07-01'
AND t1.created_at<'2020-08-01'
GROUP BY 1,2
)
WHERE DAY(t1.created_at)=DAY(t2.purchased_at);
중요!!Q20. 리텐션 : 7월기준 Day 1 retention 구하고
Daily 로 리텐션 구해서 추세 보기
- Retention : 시간이 지날수록 얼마나 많은 유저가 제품이나 서비스로 다시 돌아오는지
- N-day Retention : n=1,2,3,...,30
SELECT DATE_FORMAT(A.visited_at - interval 9 HOUR, %Y-%m-%d) AS d_date
, COUNT(DISTINCT A.customer_id) AS active_user
, COUNT(DISTINCT B.customer_id)AS retained_user
, COUNT(DISTINCT B.customer_id)/COUNT(DISTINCT A.customer_id) AS retention
FROM fastcampus.tbl_visit AS A
LEFT JOIN fastcampus.tbl_visit AS B
ON A.customer_id=B.customer_id
AND DATE_FORMAT(A.visited_at - interval 9 HOUR, %Y-%m-%d) = DATE_FORMAT(B.visited_at - interval 9 HOUR - 1 DAY, %Y-%m-%d)
WHERE A.visited_at >='2020-07-01'
AND A.visited_at<'2020-08-01'
GROUP BY 1 ;
-- 내가 쓴 답
SELECT DATE_FORMAT(visited_at - interval 9 HOUR, %Y-%m-%d) AS d_date, ROUND(COUNT(customer_id)/(SELECT COUNT(DISTINCT customer_id) FROM fastcampus.tbl_visit)*100,2) AS retention_rate
FROM(
SELECT customer_id, visited_at, MAX(visited_at)
FROM fastcampus.tbl_visit
WHERE visited_at >='2020-07-01'
AND visited_at<'2020-08-01'
GROUP BY customer_id
) AS t1
WHERE visited_at <>MAX(visited_at)
AND TIMEDIFF(visited_at, MAX(visited_at) =<24
Q21. 신규유저 : 20년 7월 신규유저가 많은지 기존유저가 많은지
가입기간별로 고객분포. DAU 기준으로
신규유저
일별 신규유저
WITH tbl_july AS (
SELECT customer_id
FROM fastcampus.tbl_customer
WHERE created_at >='2020-07-01'
ANS created_at<'2020-08-01';
)
일별 DAU
SELECT DATE_FORMAT(visited_at - INTERVAL 9 HOUR, '%Y-%m-%d'), COUNT(DISTINCT customer_id)
FROM fastcampus.tbl_visit
WHERE visited_at >='2020-07-01'
ANS visited_at<'2020-08-01'
GROUP BY 1;
기존 유저 (일별DAU - 7월 신규)
SELECT DATE_FORMAT(visited_at - INTERVAL 9 HOUR, '%Y-%m-%d')
, COUNT(DISTINCT customer_id) AS olduser
, (SELECT COUNT(DISTINCT B.customer_id) FROM tbl_july) AS newuser
FROM fastcampus.tbl_visit A
LEFT JOIN tbl_july B
ON A.customer_id = B.customer_id
WHERE A.visited_at >='2020-07-01'
ANS A.visited_at<'2020-08-01'
GROUP BY 1;
다시 풀기!! Q21 신규유저, 기존유저, 가입기간별로 고객 분포도 DAU기준
-- tbl_visit
WITH tbl_datediff AS (
SELECT DATE_FORMAT(A.visited_at - INTERVAL 9 HOUR, '%Y-%m-%d') AS d_date
, A.customer_id
, DATEDIFF(MAX(A.visited_at), B.created_at) AS date_diff
FROM tbl_visit AS A
LEFT JOIN tbl_customer AS B
ON A.customer_id = B.customer_id
WHERE A.visited_at >='2020-07-01'
AND A.visited_at <'2020-08-01'
GROUP BY 1, 2
)
SELECT A.d_date
, CASE WHEN A.date_diff < 31 THEN '~30'
WHEN A.date_diff < 61 THEN '~60'
ELSE '60~' END AS service_age
, COUNT(DISTINCT A.customer_id) AS users
, B.all_users
, ROUND(COUNT(DISTINCT A.customer_id) /B.all_users*100, 2) AS portion
FROM tbl_datediff AS A
LEFT JOIN (SELECT d_date, COUNT(customer_id) AS all_users FROM tbl_datediff GROUP BY 1) AS B
ON A.d_date = B.d_date
GROUP BY 1, 2
ORDER BY 1, 2 ;
'SQL' 카테고리의 다른 글
[SQL튜닝] SQL 성능 튜닝 기초 (INDEX, Cardinality, 점조건, 선분조건) (0) | 2022.06.28 |
---|---|
hackerrank (0) | 2022.06.27 |
실무 SQL 예제 (0) | 2022.06.27 |
Transaction 과 TCL (0) | 2022.06.27 |
권한과 DCL , GRANT, REVOKE (0) | 2022.06.27 |