-
Notifications
You must be signed in to change notification settings - Fork 0
/
7.1_ProjectTwo.sql
150 lines (131 loc) · 5.36 KB
/
7.1_ProjectTwo.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# noinspection SqlNoDataSourceInspectionForFile
-- Scenario
-- The product manager of Quantigration has asked your data analytics team for a report summarizing your analysis of
-- the return merchandise authorizations (RMAs) that have been received. These are the same data sets that you’ve
-- already been working with. Your report should focus on summarizing the analysis and presenting your findings to the
-- product manager.
-- Directions
-- RMA Report
-- In your report, respond to the manager’s requests: to summarize the data you’ve been working with and to identify
-- key information that will help the company streamline operations. Remember, not everyone who reviews this report
-- will have a technical background.
-- 1. Begin by writing SQL commands to capture usable data (which you’ve preloaded into Codio) for your analysis.
-- 2. Specifically, the product manager wants you to analyze the following:
-- Analyze the number of returns by state and describe your findings in your report.
-- Analyze the percentage of returns by product type and describe your findings in your report.
-- 3. In your report, clearly summarize your analysis of the data for stakeholders. Include screenshots of the results
-- of each query. When summarizing results, you may want to consider the following questions:
-- How does the data provide the product manager with usable information?
-- What are the potential flaws in the data that has been presented?
-- Are there any limitations on your conclusions, or any other ways of looking at it that you haven’t considered?
-- Clearly communicate your findings to stakeholders.
USE QuantigrationUpdates;
-- Before analyzing the tables, I need to first fix RMA.Reason
-- Create RMABackup table
CREATE TABLE RMABackup (
RmaID INT UNSIGNED PRIMARY KEY,
OrderID INT UNSIGNED,
Step VARCHAR(50),
Status VARCHAR(15),
Reason VARCHAR(15)
);
-- Load the source data, rma.csv, into the RMABackup table
LOAD DATA INFILE '/home/codio/workspace/rma.csv'
INTO TABLE RMABackup
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n';
-- Update RMABackup.Reason, correct the issues with each data string:
-- 'ejected' = 'Rejected',
-- 'efective' = 'Defective',
-- 'ncorrect' = 'Incorrect',
-- 'ther' = 'Other'
UPDATE RMABackup
SET Reason = 'Rejected'
WHERE Reason LIKE '%ejected%';
UPDATE RMABackup
SET Reason = 'Defective'
WHERE Reason LIKE '%efective%';
UPDATE RMABackup
SET Reason = 'Incorrect'
WHERE Reason LIKE '%ncorrect%';
UPDATE RMABackup
SET Reason = 'Other'
WHERE Reason LIKE '%ther%';
-- Update the RMA.reason field, using the data from RMABackup
UPDATE RMA
JOIN RMABackup AS backup
ON RMA.RmaID = backup.RmaID
SET RMA.Reason = backup.reason;
-- Update the State field of Collaborators for only the 4 abbreviated states: CA, FL, NC, and NY
UPDATE Collaborators
SET State = CASE
WHEN State = 'CA' THEN 'California'
WHEN State = 'FL' THEN 'Florida'
WHEN State = 'NC' THEN 'North Carolina'
WHEN State = 'NY' THEN 'New York'
ELSE State
END
WHERE LENGTH(State) <= 2;
-- ---------------------------------------------------------------------------------------------
-- Now, on to the main analysis:
-- 1. Analyze the number of returns by state
SELECT
Col.State,
COUNT(RMA.RmaID) AS RMA_Totals
FROM Collaborators AS Col
INNER JOIN Orders AS Ord
ON Col.CustomerID = Ord.CustomerID
INNER JOIN RMA
ON Ord.OrderID = RMA.OrderID
WHERE RMA.Reason != 'Rejected'
GROUP BY Col.State
ORDER BY RMA_Totals DESC;
-- 2. Analyze the percentage of returns by product type
SELECT
Orders.Sku,
COUNT(RMA.RmaID) AS RMAs_per_SKU,
(COUNT(RMA.RmaID) / (SELECT COUNT(*) FROM RMA)) AS Perc_of_RMAs
FROM Orders
INNER JOIN RMA
ON Orders.OrderID = RMA.OrderID
GROUP BY Orders.Sku
ORDER BY Perc_of_RMAs DESC;
-- ----------------------------------------------------------------
-- ----------------------------------------------------------------
-- Miscellaneous queries
-- This query did not produce the results I expected, the resulting table was not easily readable.
SELECT
Col.State,
COUNT(RMABackup.Reason) AS RMA_Reason_Counts,
RMABackup.Reason,
COUNT(Ord.OrderID) AS Total_Orders
FROM Collaborators AS Col
INNER JOIN Orders AS Ord
ON Col.CustomerID = Ord.CustomerID
INNER JOIN RMABackup
ON Ord.OrderID = RMABackup.OrderID
GROUP BY Col.State, RMABackup.Reason
ORDER BY Col.State, RMABackup.Reason;
-- This is the corrected query
SELECT
Col.State,
COUNT(DISTINCT(Ord.OrderID)) AS Order_Counts,
(SELECT COUNT(RMA2.RmaID)
FROM Collaborators AS Col2
INNER JOIN Orders AS Ord2
ON Col2.CustomerID = Ord2.CustomerID
INNER JOIN RMA AS RMA2
ON Ord2.OrderID = RMA2.OrderID
WHERE Col2.State = Col.State) AS RMA_Counts,
((SELECT COUNT(RMA3.OrderID)
FROM Collaborators AS Col3
INNER JOIN Orders AS Ord3
ON Col3.CustomerID = Ord3.CustomerID
INNER JOIN RMA AS RMA3
ON Ord3.OrderID = RMA3.OrderID
WHERE Col3.State = Col.State AND RMA3.Reason <> 'Rejected') / COUNT(Ord.OrderID) * 100) AS RMA_Percentage_by_state_orders
FROM Collaborators AS Col
INNER JOIN Orders AS Ord
ON Col.CustomerID = Ord.CustomerID
GROUP BY Col.State
ORDER BY Order_Counts DESC;