-
Notifications
You must be signed in to change notification settings - Fork 6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
SunKyoung Moon Solution for Kickbones1 project #329
base: main
Are you sure you want to change the base?
Changes from all commits
4044012
151ae2e
b03bd55
4296a58
09ff97d
a123e19
0c97fe7
ac197c7
59845cf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,44 +1,102 @@ | ||
library(ggplot2) library(dplyr) library(tidyr) | ||
# Load necessary libraries | ||
library(ggplot2) | ||
library(dplyr) | ||
|
||
\#Loading Data url <- | ||
“<https://raw.githubusercontent.com/Dr-Eberle-Zentrum/Data-projects-with-R-and-GitHub/refs/heads/main/Projects/SunKyoung%20Moon/kaggle_dataset.csv>” | ||
## | ||
## 다음의 패키지를 부착합니다: 'dplyr' | ||
|
||
data <- read.csv(url) head(data) | ||
## The following objects are masked from 'package:stats': | ||
## | ||
## filter, lag | ||
|
||
# Convert app usage time from minutes/day to hours/day | ||
## The following objects are masked from 'package:base': | ||
## | ||
## intersect, setdiff, setequal, union | ||
|
||
data <- data %>% mutate(AppUsageHours = `App.Usage.Time..min.day.` | ||
/ 60) | ||
library(tidyr) | ||
|
||
# Compute median app usage time and sum users for each device model | ||
# Load data | ||
path <- "C:/R2_data/kaggle_dataset.csv" | ||
data <- read.csv(path) | ||
head(data) | ||
|
||
device\_summary <- data %>% group\_by(`Device.Model`, Gender) | ||
%>% summarize( MedianAppUsage = median(AppUsageHours, na.rm = TRUE), | ||
UserCount = n() ) %>% ungroup() | ||
## User.ID Device.Model Operating.System App.Usage.Time..min.day. | ||
## 1 1 Google Pixel 5 Android 393 | ||
## 2 2 OnePlus 9 Android 268 | ||
## 3 3 Xiaomi Mi 11 Android 154 | ||
## 4 4 Google Pixel 5 Android 239 | ||
## 5 5 iPhone 12 iOS 187 | ||
## 6 6 Google Pixel 5 Android 99 | ||
## Screen.On.Time..hours.day. Battery.Drain..mAh.day. Number.of.Apps.Installed | ||
## 1 6.4 1872 67 | ||
## 2 4.7 1331 42 | ||
## 3 4.0 761 32 | ||
## 4 4.8 1676 56 | ||
## 5 4.3 1367 58 | ||
## 6 2.0 940 35 | ||
## Data.Usage..MB.day. Age Gender User.Behavior.Class | ||
## 1 1122 40 Male 4 | ||
## 2 944 47 Female 3 | ||
## 3 322 42 Male 2 | ||
## 4 871 20 Male 3 | ||
## 5 988 31 Female 3 | ||
## 6 564 31 Male 2 | ||
|
||
# Summarize age groups for scatter plot | ||
# Convert app usage time from minutes/day to hours/day | ||
mutate_data <- data %>% | ||
mutate(AppUsageHours = `App.Usage.Time..min.day.` / 60) | ||
|
||
data <- data %>% mutate(AgeGroup = case\_when( Age >= 20 & Age | ||
<= 29 ~ “20-29”, Age >= 30 & Age <= 39 ~ “30-39”, Age >= 40 | ||
& Age <= 49 ~ “40-49”, Age >= 50 & Age <= 59 ~ “50-59”, TRUE ~ | ||
“Others” )) | ||
# Calculate user count per device model and sort in descending order | ||
device_user_counts <- mutate_data %>% | ||
group_by(Device.Model) %>% | ||
summarize(UserCount = n()) %>% | ||
arrange(desc(UserCount)) # Sort by user count | ||
|
||
\#Visualization part! ggplot(data, aes(x = Device.Model, y = | ||
AppUsageHours, fill = Gender)) + geom\_violin(alpha = 0.7, scale = | ||
“width”) + geom\_point(aes(color = AgeGroup), position = | ||
position\_jitter(width = 0.2, height = 0), alpha = 0.5) + | ||
scale\_fill\_manual(values = c(“Male” = “blue”, “Female” = “red”)) + | ||
scale\_color\_manual(values = c( “20-29” = “gray”, “30-39” = “green”, | ||
“40-49” = “pink”, “50-59” = “purple” )) + labs( title = “Mobile Device | ||
Usage for Different Models”, subtitle = “Median app usage time | ||
differentiated by gender and age groups”, x = “Device Model”, y = | ||
“Median App Usage Time (hours/day)”, fill = “Gender”, color = “Age | ||
Group” ) + theme\_minimal() | ||
# Convert Device.Model to a factor with levels ordered by UserCount | ||
mutate_data$Device.Model <- factor( | ||
mutate_data$Device.Model, | ||
levels = device_user_counts$Device.Model | ||
) | ||
|
||
device\_user\_counts <- data %>% group\_by(Device.Model) %>% | ||
summarize(UserCount = n()) | ||
# Add user count to device labels for display on the x-axis | ||
mutate_data <- mutate_data %>% | ||
left_join(device_user_counts, by = "Device.Model") %>% | ||
mutate(DeviceLabel = paste0(Device.Model, "\n", UserCount, " Users")) | ||
|
||
ggplot(device\_user\_counts, aes(x = Device.Model, y = UserCount)) + | ||
geom\_col(fill = “lightblue”) + geom\_text(aes(label = UserCount), vjust | ||
= -0.5) + labs( title = “User Count per Device Model”, x = “Device | ||
Model”, y = “User Count” ) + theme\_minimal() | ||
# Add age group column for grouping points in the scatter plot | ||
age_data <- mutate_data %>% | ||
mutate(AgeGroup = case_when( | ||
Age >= 20 & Age <= 29 ~ "20-29", | ||
Age >= 30 & Age <= 39 ~ "30-39", | ||
Age >= 40 & Age <= 49 ~ "40-49", | ||
Age >= 50 & Age <= 59 ~ "50-59", | ||
TRUE ~ "Others" # Default for ages outside defined ranges | ||
)) | ||
|
||
# Visualization: Violin plot with scatter plot overlay | ||
ggplot(age_data, aes(x = DeviceLabel, y = AppUsageHours, fill = Gender)) + | ||
geom_violin(alpha = 0.7, scale = "width", position = position_dodge(width = 0.8)) + | ||
geom_point( | ||
aes(color = AgeGroup), | ||
position = position_jitterdodge(jitter.width = 0.2, dodge.width = 0.8), | ||
alpha = 0.5 | ||
) + | ||
scale_fill_manual(values = c("Male" = "blue", "Female" = "red")) + | ||
scale_color_manual(values = c( | ||
"20-29" = "gray", | ||
"30-39" = "green", | ||
"40-49" = "pink", | ||
"50-59" = "purple" | ||
)) + | ||
labs( | ||
title = "Mobile Device Usage for Different Models", | ||
subtitle = "Median app usage time differentiated by gender and age groups\nUser count displayed on x-axis", | ||
x = "Device Model", | ||
y = "Median App Usage Time (hours/day)", | ||
fill = "Gender", | ||
color = "Age Group" | ||
) + | ||
theme_minimal() + | ||
theme(axis.text.x = element_text(angle = 30, hjust = 1)) | ||
|
||
![](SunKyoung-Moon_Luis-proj_files/figure-markdown_strict/load-libraries-1.png) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
--- | ||
title: "kickbones1_from SunKyoung Moon(fist trial)" | ||
author: "Luis" | ||
output: md_document | ||
date: "2024-12-03" | ||
--- | ||
|
||
#Result Image | ||
![Result](solution for kickbones1(first trial).png) | ||
|
||
```r | ||
```{r, echo=FALSE, out.width="50%"} | ||
knitr::include_graphics("solution for kickbones1(first trial).png") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
\#Result Image ![Result](solution%20for%20kickbones1(first%20trial).png) | ||
|
||
\`\`\`r | ||
<img src="solution for kickbones1(first trial).png" width="50%" /> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
--- | ||
title: "kickbones1_from SunKyoung Moon(fist trial)" | ||
author: "Luis" | ||
output: md_document | ||
date: "2024-12-03" | ||
--- | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you are missing a "code block start and end" using three backticks.. .please double check with the rmarkdown chapter from the beginning of the course! |
||
library(ggplot2) | ||
library(dplyr) | ||
library(tidyr) | ||
|
||
#Loading Data | ||
url <- "https://raw.githubusercontent.com/Dr-Eberle-Zentrum/Data-projects-with-R-and-GitHub/refs/heads/main/Projects/SunKyoung%20Moon/kaggle_dataset.csv" | ||
|
||
data <- read.csv(url) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. since the data is within the same folder:
|
||
head(data) | ||
|
||
# Convert app usage time from minutes/day to hours/day | ||
data <- data %>% | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. recommendation: dont overwrite your data! split rawdata and modified one, in case you need two data sets. but typically, it is not needed |
||
mutate(AppUsageHours = `App.Usage.Time..min.day.` / 60) | ||
|
||
# Compute median app usage time and sum users for each device model | ||
device_summary <- data %>% | ||
group_by(`Device.Model`, Gender) %>% | ||
summarize( | ||
MedianAppUsage = median(AppUsageHours, na.rm = TRUE), | ||
UserCount = n() | ||
) %>% | ||
ungroup() | ||
|
||
# Summarize age groups for scatter plot | ||
data <- data %>% | ||
mutate(AgeGroup = case_when( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. move this up to the pipeline after line 20... dont split your data wrangling |
||
Age >= 20 & Age <= 29 ~ "20-29", | ||
Age >= 30 & Age <= 39 ~ "30-39", | ||
Age >= 40 & Age <= 49 ~ "40-49", | ||
Age >= 50 & Age <= 59 ~ "50-59", | ||
TRUE ~ "Others" | ||
)) | ||
|
||
#Visualization part! | ||
ggplot(data, aes(x = Device.Model, y = AppUsageHours, fill = Gender)) + | ||
geom_violin(alpha = 0.7, scale = "width") + | ||
geom_point(aes(color = AgeGroup), position = position_jitter(width = 0.2, height = 0), alpha = 0.5) + | ||
scale_fill_manual(values = c("Male" = "blue", "Female" = "red")) + | ||
scale_color_manual(values = c( | ||
"20-29" = "gray", | ||
"30-39" = "green", | ||
"40-49" = "pink", | ||
"50-59" = "purple" | ||
)) + | ||
labs( | ||
title = "Mobile Device Usage for Different Models", | ||
subtitle = "Median app usage time differentiated by gender and age groups", | ||
x = "Device Model", | ||
y = "Median App Usage Time (hours/day)", | ||
fill = "Gender", | ||
color = "Age Group" | ||
) + | ||
theme_minimal() | ||
|
||
device_user_counts <- data %>% | ||
group_by(Device.Model) %>% | ||
summarize(UserCount = n()) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. dont store the data in a variable just for subsequent visualization. directly pipe from data via your changes into the ggplot call.. that way, less things can go wrong! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and you have less variables.. ;) |
||
ggplot(device_user_counts, aes(x = Device.Model, y = UserCount)) + | ||
geom_col(fill = "lightblue") + | ||
geom_text(aes(label = UserCount), vjust = -0.5) + | ||
labs( | ||
title = "User Count per Device Model", | ||
x = "Device Model", | ||
y = "User Count" | ||
) + | ||
theme_minimal() | ||
|
||
#Result | ||
|
||
![visualization_Result](https://imgur.com/jni0RwO) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. dont upload files somewhere else... they should be generated within the Rmd script and subsequently added to the github repo..! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so please:
|
||
![Visualization_Result2](https://imgur.com/DOEtSmH) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
library(ggplot2) library(dplyr) library(tidyr) | ||
|
||
\#Loading Data url <- | ||
“<https://raw.githubusercontent.com/Dr-Eberle-Zentrum/Data-projects-with-R-and-GitHub/refs/heads/main/Projects/SunKyoung%20Moon/kaggle_dataset.csv>” | ||
|
||
data <- read.csv(url) head(data) | ||
|
||
# Convert app usage time from minutes/day to hours/day | ||
|
||
data <- data %>% mutate(AppUsageHours = `App.Usage.Time..min.day.` | ||
/ 60) | ||
|
||
# Compute median app usage time and sum users for each device model | ||
|
||
device\_summary <- data %>% group\_by(`Device.Model`, Gender) | ||
%>% summarize( MedianAppUsage = median(AppUsageHours, na.rm = TRUE), | ||
UserCount = n() ) %>% ungroup() | ||
|
||
# Summarize age groups for scatter plot | ||
|
||
data <- data %>% mutate(AgeGroup = case\_when( Age >= 20 & Age | ||
<= 29 ~ “20-29”, Age >= 30 & Age <= 39 ~ “30-39”, Age >= 40 | ||
& Age <= 49 ~ “40-49”, Age >= 50 & Age <= 59 ~ “50-59”, TRUE ~ | ||
“Others” )) | ||
|
||
\#Visualization part! ggplot(data, aes(x = Device.Model, y = | ||
AppUsageHours, fill = Gender)) + geom\_violin(alpha = 0.7, scale = | ||
“width”) + geom\_point(aes(color = AgeGroup), position = | ||
position\_jitter(width = 0.2, height = 0), alpha = 0.5) + | ||
scale\_fill\_manual(values = c(“Male” = “blue”, “Female” = “red”)) + | ||
scale\_color\_manual(values = c( “20-29” = “gray”, “30-39” = “green”, | ||
“40-49” = “pink”, “50-59” = “purple” )) + labs( title = “Mobile Device | ||
Usage for Different Models”, subtitle = “Median app usage time | ||
differentiated by gender and age groups”, x = “Device Model”, y = | ||
“Median App Usage Time (hours/day)”, fill = “Gender”, color = “Age | ||
Group” ) + theme\_minimal() | ||
|
||
device\_user\_counts <- data %>% group\_by(Device.Model) %>% | ||
summarize(UserCount = n()) | ||
|
||
ggplot(device\_user\_counts, aes(x = Device.Model, y = UserCount)) + | ||
geom\_col(fill = “lightblue”) + geom\_text(aes(label = UserCount), vjust | ||
= -0.5) + labs( title = “User Count per Device Model”, x = “Device | ||
Model”, y = “User Count” ) + theme\_minimal() | ||
|
||
\#Result ![visualization\_Result](https://imgur.com/jni0RwO) | ||
![Visualization\_Result2](https://imgur.com/DOEtSmH) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
file name wise: dont put "first trial", "version X", etc. into file names.. this is why we are using a version control system like git to manage such meta information via its history or versioning. please rename!