-
Notifications
You must be signed in to change notification settings - Fork 1
/
run_analysis.r
32 lines (25 loc) · 1.77 KB
/
run_analysis.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#this script is meant to merge the training and test data and produce a file that produces the mean with respect to a subject's activity
#the script sorts the features based on the subject and the activities(with the order listed in "activity_labels.txt".
#we first perform feature selection, that is only take features that use std() and mean()
#note that we did not include meanFreq() as I think that it is a weighted average which carries a different meaning.
#However, if your interpretation in different from mine, you just have to edit the line with grep with grep("std|mean",features$V2)
features<-read.table("features.txt")
subfeatures<-grep("std\\(\\)|mean\\(\\)",features$V2)
#combine the training and test features and keep only the relevant features
X<-rbind(read.table("train/X_train.txt")[,subfeatures],read.table("test/X_test.txt")[,subfeatures])
colnames(X)<-features[subfeatures,2]
#combine the activities of training and test data and replace the activities id with meaningful labels.
Y<-rbind(read.table("train/y_train.txt"),read.table("test/y_test.txt"))
activity_labels<-read.table("activity_labels.txt")
Y$activity_labels<-factor(Y$V1,levels=activity_labels$V1,labels=activity_labels$V2)
#combine the subjects of trining and test data and combined with the activities.
subject<-rbind(read.table("train/subject_train.txt"),read.table("test/subject_test.txt"))
extracolumns<-cbind(subject,Y$activity)
colnames(extracolumns)<-cbind("subject","activity")
#combine everything
firstdata<-cbind(extracolumns,X)
#step 5
tidydata<-aggregate(firstdata[,3:ncol(firstdata)],by=list(firstdata$subject,firstdata$activity),FUN=mean)
colnames(tidydata)[1:2]<-c("subject","activity")
tidydata<-tidydata[order(tidydata[,1],tidydata[,2]),]
write.table(tidydata,file="tidydata.txt",row.names=FALSE)