춘식이2
업글이✧
춘식이2
전체 방문자
오늘
어제
  • 분류 전체보기
    • Data Analysis
    • Python
    • SQL
    • Statistics

블로그 메뉴

  • 홈
  • 태그
  • 방명록

공지사항

인기 글

태그

  • rank함수
  • NULL
  • uv
  • window함수
  • python3
  • ifnull
  • sql튜닝
  • Python
  • Apriori
  • coalesce
  • 장바구니분석
  • Codility
  • SQL
  • Jupyter
  • 데이터분석
  • hackerrank
  • 데이터디스커버리
  • pv
  • NVL
  • excel

최근 댓글

최근 글

티스토리

hELLO · Designed By 정상우.
춘식이2

업글이✧

SQL

실무 SQL 예제

2022. 6. 27. 14:27
Q1
SELECT SUM(price)
FROM tbl_purchase
WHERE purchased_at LIKE '2020-07%' ;
SELECT SUM(price)
FROM tbl_purchase
WHERE purchased_at >= '2020-07-01'
AND purchased_at< '2020-08-01';
Q2 2020 07의 MAU?
SELECT COUNT(DISTINCT customer_id)
FROM tbl_visit
WHERE visited_at LIKE '2020-07%';
#Q3 7월의 ACTIVE 유저의 paying rate?
SELECT COUNT(DISTINCT customer_id)/(SELECT COUNT(DISTINCT customer_id) FROM tbl_visit WHERE visited_at LIKE '2020-07%') AS paying_rate
FROM tbl_purchase
WHERE purchased_at LIKE '2020-07%' ;
#Q4 7월의 구매유저의 월평균 구매액
SELECT SUM(price)/COUNT( DISTINCT customer_id) AS avg_price
FROM tbl_purchase
WHERE purchased_at LIKE '2020-07%' ;

 

#Q5 7월에 가장 많이 구매한고객 TOP3,10-15
SELECT customer_id, RANK() OVER (ORDER BY SUM(price)) AS rank_july
FROM tbl_purchase
WHERE rank_july IN (1,2,3,10,11,12,13,14,15)
GROUP BY customer_id;

 

#Q6 20.07월 평균 DAU
SELECT AVG(
FROM tbl_visit
WHERE visited_at LIKE '2020-07%';
GROUP BY
#Q2 2020 07의 MAU?

SELECT COUNT(DISTINCT customer_id)
FROM tbl_visit
WHERE visited_at >='2020-07-01'
AND visited_at<'2020-08-01';
#Q3 7월의 ACTIVE 유저의 paying rate?

SELECT COUNT(DISTINCT customer_id)/(SELECT COUNT(DISTINCT customer_id) FROM tbl_visit WHERE visited_at >='2020-07-01'
AND visited_at <'2020-08-01') AS paying_rate

FROM tbl_purchase

WHERE purchased_at >='2020-07-01'
AND purchased_at <'2020-08-01' ;


SELECT ROUND(11174/16414*100,2) ;

 

#Q4 7월의 구매유저의 월평균 구매액 ARPPU
#Average Revenue Per Paying User 객단가

SELECT AVG(revenue)
FROM(SELECT SUM(price) AS revenue,
customer_id
FROM tbl_purchase
WHERE purchased_at >='2020-07-01'
AND purchased_at <'2020-08-01'
GROUP BY customer_id ) AS T1 ;




--내가 쓴 답
SELECT SUM(price)/COUNT( DISTINCT customer_id) AS arppu
FROM tbl_purchase
WHERE purchased_at >='2020-07-01'
AND purchased_at <'2020-08-01' ;
#Q5 7월에 가장 많이 구매한고객 TOP3,10-15

SELECT customer_id, SUM(price)
FROM tbl_purchase
GROUP BY customer_id
ORDER BY SUM(price) DESC
LIMIT 3;

SELECT customer_id, SUM(price)
FROM tbl_purchase
GROUP BY customer_id
ORDER BY SUM(price) DESC
LIMIT 5 OFFSET 9;



--내가 작성한 답
SELECT rank_july
FROM(SELECT customer_id, sum(price),
RANK() OVER (ORDER BY SUM(price)) AS
rank_july
FROM tbl_purchase
GROUP BY customer_id) AS t1
WHERE rank_july<4
OR (rank_july>=10 AND rank_july<16);

 

날짜 시간별 분석 문제 모음

SELECT NOW(); -- 연월일시분초

SELECT CURRENT_DATE(); --연/월/일까지만

SELECT EXTRACT(MONTH FROM '2021-12-01') -- 월 부분만

SELECT DAY('2021-01-13'); -- 일자만 뽑아줌

SELECT DAY_ADD('2021-01-01', INTERVAL 7 DAY); -- 7일 후인 1월 8일을 보여줌

SELECT DAY_SUB('2021-06-15', INTERVAL 7 DAY); -- 7일전인 6월 8일 보여줌

SELECT DATEDIFF('2021-06-25', '2021-06-15'); -- 두개 날짜 일수차이

SELECT TIMEDIFF('2021-06-25 12:10:00', '2021-06-25 10:10:00'); -- 두개 날짜 시간차이

SELECT DATE_FORMAT(NOW(), '%Y-%m-%d'); -- 날짜 형식 바꾸기

%W : 요일 Sunday, Monday, Tuesday ...
%a : 요일 SUN, MON, TUE

 

#Q6. 2020년 7월 평균 DAU, Active User 수가 증가하는 추세?

#<DAU 구하기>
SELECT DATE_FORMAT(visited_at - INTERVAL 9 hour, %Y-%m-%d) AS date_at
, COUNT(DISTINCT customer_id)
FROM fastcampus.tbl_visit
WHERE visited_at >='2020-07-01'
  AND visited_at <'2020-08-01'
GROUP BY 1



--내 답
SELECT AVG(activeuser)
FROM(
    SELECT DAY(visited_at) AS day,
    COUNT(DISTINCT customer_id) AS activeuser
    FROM fastcampus.tbl_visit
    WHERE visited_at >='2020-07-01'
      AND visited_at <'2020-08-01'
    GROUP BY 1) AS t1;


#<MAU구하기>
SELECT COUNT(DISTINCT customer_id)
FROM fastcampus.tbl_visit
WHERE visited_at >='2020-07-01'
  AND visited_at <'2020-08-01' ;
#Q7. 2020년 7월 평균 WAU 구해주세요

SELECT AVG(users)
FROM(
    SELECT DATE_FORMAT(visited_at - interval 9 hour, '%Y-%m-%U') as date_at
    COUNT(DISTINCT customer_id) AS     users
    FROM fastcampus.tbl_visit
    WHERE visited_at >='2020-07-05'
      AND visited_at <'2020-07-26'
    GROUP BY 1 ) AS t1

 

#Q8-1. 2020년 7월의 Daily Revenue는 증가하는 추세?
#평균 Daily Revenue

SELECT DATE_FORMAT(purchased_at -interval 9 hour, '%Y-%m-%d') AS days
, SUM(price) AS revenue
FROM fastcampus.tbl_purchase
WHERE purchased_at >='2020-07-01'
AND purchased_at < '2020-08-01'
GROUP BY 1
ORDER BY 1;

SELECT AVG(revenue)
FROM(
SELECT DATE_FORMAT(purchased_at -interval 9 hour, '%Y-%m-%d') AS days, SUM(price) AS revenue
FROM fastcampus.tbl_purchase
WHERE purchased_at >='2020-07-01'
AND purchased_at < '2020-08-01'
GROUP BY 1) AS t1;



#Q8-2. 2020년 7월의 평균 Weekly Revenue

SELECT AVG(revenue)
FROM(
    SELECT DATE_FORMAT(purchased_at -interval 9 hour, '%Y-%m-%U') AS days, SUM(price) AS revenue
    FROM fastcampus.tbl_purchase
    WHERE purchased_at >='2020-07-05'
    AND purchased_at < '2020-07-26'
    GROUP BY 1) AS t1;

 

#Q9. 20년 7월 요일별 Daily Revenue
#어느 요일이 Revenue 가 가장 높고 낮나요?

SELECT DATE_FORMAT(days, '%W') AS weeks, AVG(revenue)
FROM(
    SELECT DATE_FORMAT(purchased_at -
    interval 9 hour, '%Y-%m-%d') AS days,
    SUM(price) AS revenue
    FROM fastcampus.tbl_purchase
    WHERE purchased_at >='2020-07-01'
    AND purchased_at < '2020-08-01'
    GROUP BY 1) AS t1
GROUP BY 1
ORDER BY 2 DESC;

 

 

#Q10. 20년 7월 시간대별 시간당 총 Revenue
#어느시간대가 Revenue가 가장 높고 낮나요?

SELECT hours, AVG(revenue)
FROM(
    SELECT DATE_FORMAT(purchased_at -
    interval 9 hour, '%Y-%m-%d') AS days,
    DATE_FORMAT(purchased_at -
    interval 9 hour, '%H') AS hours,
    SUM(price) AS revenue
    FROM fastcampus.tbl_purchase
    WHERE purchased_at >='2020-07-01'
    AND purchased_at < '2020-08-01'
    GROUP BY 1, 2 ) AS t1
GROUP BY 1
ORDER BY 2 DESC ;
#Q11. 20년 7월 요일 및 시간대별 Revenue

SELECT dayofweeks, hours, AVG(revenue)
FROM(
	SELECT DATE_FORMAT(purchased_at - interval 9 hour, '%Y-%m-%d') AS days,
	DATE_FORMAT(purchased_at - interval 9 hour, '%W') AS dayofweeks,
	DATE_FORMAT(purchased_at - interval 9 hour, '%H') AS hours,
	SUM(price) AS revenue
	FROM fastcampus.tbl_purchase
	WHERE purchased_at >='2020-07-01'
	AND purchased_at < '2020-08-01'
	GROUP BY 1, 2, 3 ) AS t1
GROUP BY 1, 2
ORDER BY 3 DESC ;


SELECT dayofweeks, hours, AVG(activeusers)
FROM(
    SELECT DATE_FORMAT(visited_at - interval 9 hour, '%Y-%m-%d') AS dates,
    DATE_FORMAT(visited_at - interval 9 hour, '%W') AS dayofweeks,
    DATE_FORMAT(visited_at - interval 9 hour, '%H') AS hours
    COUNT(DISTINCT customer_id) AS activeusers
    FROM fastcampus.tbl_visit
    GROUP BY 1, 2 ,3 ) AS t1
GROUP BY 1, 2
ORDER BY 3 ;

 

Q12. 유저 세그멘트별 분석, 성별 연령

SELECT CASE WHEN length(gender)<1 THEN 'Others' ELSE gender END AS gender_group,
CASE
WHEN age =<15 THEN '15세이하'
WHEN age =<20 THEN '16~20세'
WHEN age =<25 THEN '21~25세'
WHEN age =<30 THEN '26~30세'
WHEN age =<35 THEN '31~35세'
WHEN age =<40 THEN '36~40세'
WHEN age =<45 THEN '41~45세'
WHEN age =<50 THEN '46~50세'
WHEN age =<55 THEN '51~55세'
ELSE '56세이상' ) AS age_group,
, COUNT(*)
FROM fastcampus.tbl_customer
GROUP BY 1, 2
ORDER BY 3 DESC;

--내가 쓴 답
SELECT (IF gender='F' THEN '여자'
ELSEIF gender='M' THEN '남자'
ELSE '기타'
END) AS gender_group
, (CASE
WHEN age =<10 THEN '10세이하'
WHEN age>10 AND age =<15 THEN '11~15세'
WHEN age>15 AND age =<20 THEN '16~20세'
WHEN age>20 AND age =<25 THEN '21~25세'
WHEN age>25 AND age =<30 THEN '26~30세'
WHEN age>30 AND age =<35 THEN '31~35세'
WHEN age>35 AND age =<40 THEN '36~40세'
WHEN age>40 AND age =<45 THEN '41~45세'
WHEN age>45 AND age =<50 THEN '46~50세'
WHEN age>50 AND age =<55 THEN '51~55세'
WHEN age>55 AND age =<60 THEN '56~60세'
WHEN age>61 THEN '60세이상' ) AS age_group,
COUNT( DISTINCT customer_id)
FROM fastcampus.tbl_customer
GROUP BY 1, 2
ORDER BY 3 DESC;
-- 먼저 만들떄 이렇게
SELECT DISTINCT gender
FROM fastcampus.tbl_customer

SELECT DISTINCT age
FROM fastcampus.tbl_customer
ORDER BY age
-- Q13. Q12결과 성 연령을 남성 25-29세와 같이 통합해 주시고, 각 성연령이 전체 고객에서 얼마나 차지하는지 분포(%)를 알려주세요. 분포가 높은 순서대로 알려주세요

SELECT count(*)
FROM fastcampus.tbl_customer

SELECT CONCAT(CASE WHEN length(gender)<1 THEN '기타' WHEN gender='M' THEN '남성' WHEN 'F' THEN '여성' END
, "("
, CASE
WHEN age =<15 THEN '15세이하'
WHEN age =<20 THEN '16~20세'
WHEN age =<25 THEN '21~25세'
WHEN age =<30 THEN '26~30세'
WHEN age =<35 THEN '31~35세'
WHEN age =<40 THEN '36~40세'
WHEN age =<45 THEN '41~45세'
WHEN age =<50 THEN '46~50세'
WHEN age =<55 THEN '51~55세'
ELSE '56세이상'
, ")" AS segment
, ROUND(count(*) /(SELECT count(*) FROM fastcampus.tbl_customer)*100, 2) AS portion
FROM fastcampus.tbl_customer
GROUP BY 1
ORDER BY 2 DESC,



--내가 쓴 답
SELECT CONCAT(gender_group, '(', age_group, ')') AS gender_age, COUNT(*)
FROM(
SELECT CASE WHEN length(gender)<1 THEN 'Others' ELSE gender END AS gender_group,
CASE
WHEN age =<15 THEN '15세이하'
WHEN age =<20 THEN '16~20세'
WHEN age =<25 THEN '21~25세'
WHEN age =<30 THEN '26~30세'
WHEN age =<35 THEN '31~35세'
WHEN age =<40 THEN '36~40세'
WHEN age =<45 THEN '41~45세'
WHEN age =<50 THEN '46~50세'
WHEN age =<55 THEN '51~55세'
ELSE '56세이상' ) AS age_group,
, COUNT(*)
FROM fastcampus.tbl_customer
GROUP BY 1, 2
) AS t1
GROUP BY 1;

 

Q14. 20년 7월, 성별에 따라 총 구매건수, 총 Revenue, 이전처럼 남여 이외의 성별은 하나로 묶어 주세요

SELECT IF length(t2.gender)<1 OR t2.gender='Others' THEN '기타' ELSE gender END, COUNT(t1. customer_id) AS cnt, SUM(t1. price) AS revenue
FROM fastcampus.tbl_purchase AS t1
LEFT JOIN fastcampus.tbl_customer AS t2
ON t1.customer_id=t2.customer_id
WHERE t1.purchased_at >='2020-07-01'
AND t1.purchased_at <'2020-08-01'
GROUP BY 1
ORDER BY 2,3 ;

--> WHERE 조건문은 JOIN 다음에 !!!
SELECT
FROM
LEFT JOIN
WHERE

 

 

Q15. 20년 7월 성별 연령대애 따라 구매건수와 총 REvenue 구해주세요


SELECT IF length(t2.gender)<1 OR t2.gender='Others' THEN '기타'
ELSE gender END AS '성별'
, CASE
WHEN t2.age =<15 THEN '15세이하'
WHEN t2.age =<20 THEN '16~20세'
WHEN t2.age =<25 THEN '21~25세'
WHEN t2.age =<30 THEN '26~30세'
WHEN t2.age =<35 THEN '31~35세'
WHEN t2.age =<40 THEN '36~40세'
WHEN t2.age =<45 THEN '41~45세'
WHEN t2.age =<50 THEN '46~50세'
WHEN t2.age =<55 THEN '51~55세'
ELSE '56세이상' AS '연령'
, COUNT(t1. customer_id) AS cnt
, SUM(t1. price) AS revenue

FROM fastcampus.tbl_purchase AS t1
LEFT JOIN fastcampus.tbl_customer AS t2
ON t1.customer_id=t2.customer_id

WHERE t1.purchased_at >='2020-07-01'
AND t1.purchased_at <'2020-08-01'
GROUP BY 1, 2
ORDER BY 3, 4 DESC ;

 

Q18 WITH, LAG함수 :20년 7월 일별 매출의 전일 대비 증감폭, 증감률을 구해주세요

--임시 테이블처럼 쓸수있음
WITH tbl_revenue AS (
SELECT DATE_FORMAT(purchased_at - INTERVAL 9 hour, '%Y-%m-%d') AS d_date, sum(price) AS revenue,
. DAY(purchased_at) - interval 1
FROM fastcampus.tbl_purchase
WHERE purchased_at >='2020-07-01'
AND purchased_at < '2020-08-01'
GROUP BY 1
)

SELECT d_date, revenue
, LAG(revenue) OVER (ORDER BY d_date ASC)
, revenue-LAG(revenue) OVER (ORDER BY d_date ASC) AS rev_diff
, ROUND((revenue-LAG(revenue) OVER (ORDER BY d_date ASC))/(LAG(revenue) OVER (ORDER BY d_date ASC))*100, 2)
FROM tbl_revenue

 

'SQL' 카테고리의 다른 글

hackerrank  (0) 2022.06.27
실무 SQL 예제 2  (0) 2022.06.27
Transaction 과 TCL  (0) 2022.06.27
권한과 DCL , GRANT, REVOKE  (0) 2022.06.27
CREATE TABLE , 제약조건(NOT NULL, UNIQUE, DEFAULT, PK, FK)  (0) 2022.06.27
    'SQL' 카테고리의 다른 글
    • hackerrank
    • 실무 SQL 예제 2
    • Transaction 과 TCL
    • 권한과 DCL , GRANT, REVOKE
    춘식이2
    춘식이2

    티스토리툴바