计算每月数据的平均值和中位数,并将其转换为htmltable R [英] Calculating Average and median on Monthly data and convert in htmltable R
问题描述
我有一个DF:
my_data< - read.table(text =
ID Date1 T1日期2 Val1
A-1'2018-01-10 15:05:24'A 2018-01-15 10
A-2'2018-01-05 14:15:22'B 2018- 01-14 12
A-3'2018-01-04 13:20:21'A 2018-01-13 15
A-4'2018-01-01 18:35:45'B 2018-01-12 22
A-5'2017-12-28 19:45:10'A 2018-01-11 18
A-6'2017-12-10 08:03:29 'A 2018-01-10 21
A-7'2017-12-06 20:55:55'A 2018-01-09 28
A-8'2018-01-10 10:02 :12'A 2018-01-15 10
A-9'2018-01-05 17:15:14'B 2018-01-14 12
A-10'2018-01-04 18 :35:58'A 2018-01-13 15
A-11'2018-01-01 21:09:25'B 2018-01-12 22
A-12'2017-12-28 02:12:22 'A 2018-01-11 18
A-13'2017-12-10 03:45:44'A 2018-01-10 21
A-14'2017-12-06 07:15 :25'A 2018-01-09 28
A-18'2017-10-07 08:02:84 B 2017-11-05 20
A-21'2017-10-01 06: 04:04 A 2017-10-20 15
A-51'2017-09-20 08:07:06 A 2017-09-28 10
A-35'2017-09-14 08: 02:45 A 2017-09-25 20
A-30'2017-08-10 15:03:08 A 2017-08-30 25,
header = TRUE,stringsAsFactors = FALSE)
并运行下面提到的代码,我得到的输出如下所示:
table_2< - 合并(
my_data %>%
mutate(Date2 = ymd(Date2))%>%
arrange(Date2)%>%
mutate(Month = paste(month(ymd_hms(Date1), (%)$%
过滤器(T1 =A)%>%
group_by(月)%>%>
总结(A =的总数= n(),
A =总和(Val1))%>%
mutate(MOM Growth#of A= round (应用(cbind(`#A',lag( - `#of A`)),
1,sum,na.rm = TRUE)/ lag(`#of A`),2),
MOM成长总和A= round(apply(cbind(`A sum of A`,lag( - `sum of A`)),
1,sum,na.rm = TRUE)/ lag ('A'总和)* 100,2))%>%
select(Month,`#of A`,`MOM Growth#of A`,`Sum of A`,`MOM Growth of sum (Date2))%>%
arrange(Date2)%>%
mutate(Month =粘贴(month(ymd_hms(Date1),label = TRUE),year(Date1),sep = - ))%>%
过滤器(T1 ==B)%>%
group_by(Month)%>%
summary(#of B= n(),
B =(B',#(B')),$ b $ = sum(Val1))%>%
mutate(MOM Growth#of B= round (b'*,b',sum,na.rm = TRUE)/ lag(`#of B'* 100),2),
MOM Growth of B= round (B'的总和)),
1,sum,na.rm = TRUE)/ lag('B'的总和)* 100),2)%>%
select月,`#B',`MOM增长B',`B'总和,`MOM B'增长总和),
by =Month,
all = TRUE)
table_2 [is.na(table_2)] < -
输出(table_2):
现在我想要再添加两列 A的中位数
,在 A的状态
头部和 B $ c $的中位数
I have a DF:
my_data <- read.table(text =
"ID Date1 T1 Date2 Val1
A-1 '2018-01-10 15:05:24' A 2018-01-15 10
A-2 '2018-01-05 14:15:22' B 2018-01-14 12
A-3 '2018-01-04 13:20:21' A 2018-01-13 15
A-4 '2018-01-01 18:35:45' B 2018-01-12 22
A-5 '2017-12-28 19:45:10' A 2018-01-11 18
A-6 '2017-12-10 08:03:29' A 2018-01-10 21
A-7 '2017-12-06 20:55:55' A 2018-01-09 28
A-8 '2018-01-10 10:02:12' A 2018-01-15 10
A-9 '2018-01-05 17:15:14' B 2018-01-14 12
A-10 '2018-01-04 18:35:58' A 2018-01-13 15
A-11 '2018-01-01 21:09:25' B 2018-01-12 22
A-12 '2017-12-28 02:12:22' A 2018-01-11 18
A-13 '2017-12-10 03:45:44' A 2018-01-10 21
A-14 '2017-12-06 07:15:25' A 2018-01-09 28
A-18 '2017-10-07 08:02:84 B 2017-11-05 20
A-21 '2017-10-01 06:04:04 A 2017-10-20 15
A-51 '2017-09-20 08:07:06 A 2017-09-28 10
A-35 '2017-09-14 08:02:45 A 2017-09-25 20
A-30 '2017-08-10 15:03:08 A 2017-08-30 25",
header = TRUE, stringsAsFactors = FALSE)
and running below mentioned code i get output as shown in below:
table_2 <- merge(
my_data %>%
mutate(Date2 = ymd(Date2)) %>%
arrange(Date2) %>%
mutate(Month = paste(month(ymd_hms(Date1), label = TRUE), year(Date1), sep = "-")) %>%
filter(T1 == "A") %>%
group_by(Month) %>%
summarise("# of A" = n(),
"sum of A" = sum(Val1)) %>%
mutate("MOM Growth # of A" = round(apply(cbind(`# of A`, lag(- `# of A`)),
1, sum, na.rm = TRUE) / lag(`# of A`), 2),
"MOM Growth sum of A" = round(apply(cbind(`sum of A`, lag(- `sum of A`)),
1, sum, na.rm = TRUE) / lag(`sum of A`) * 100, 2)) %>%
select(Month, `# of A`, `MOM Growth # of A`, `sum of A`, `MOM Growth sum of A`),
my_data %>%
mutate(Date2 = ymd(Date2)) %>%
arrange(Date2) %>%
mutate(Month = paste(month(ymd_hms(Date1), label = TRUE), year(Date1), sep = "-")) %>%
filter(T1 == "B") %>%
group_by(Month) %>%
summarise("# of B" = n(),
"sum of B" = sum(Val1)) %>%
mutate("MOM Growth # of B" = round(apply(cbind(`# of B`, lag(- `# of B`)),
1, sum, na.rm = TRUE) / lag(`# of B` * 100), 2),
"MOM Growth sum of B" = round(apply(cbind(`sum of B`, lag(- `sum of B`)),
1, sum, na.rm = TRUE) / lag(`sum of B`) * 100), 2) %>%
select(Month, `# of B`, `MOM Growth # of B`, `sum of B`, `MOM Growth sum of B`),
by = "Month",
all = TRUE)
table_2[is.na(table_2)] <- ""
Output (table_2):
Now i want to add two more column Median of A
, Avg Time of A
under Status of A
head and Median of B
, Avg Time of B
in Status of B
head. and convert those output in htmltable format.
Just wanted to know how to tweak code in summarise
to calculate these value on monthly data.
in addition, month should be in sequence order in output data & if there is any month missing between Max
month and Min
month than that month should come with all value 0 except MOM Growth
for both Status of A
and Status of B
because that should be than -100%.
Here is an attempt using:
library(dplyr)
library(lubridate)
library(tableHTML)
I have added a median and avg time column for A
and B
, plus added the MOM Growth
condition, and ensured that the Months are 1) in the correct order and 2) complete, even if no data is available in a particular month.
In order to get all Months, create a date sequence from the first to the last date in your data (with 1 month step in between). Then make sure both groups A
and B
have a date (because there is a filter later on):
date_range = expand.grid(Date1 = seq(min(ymd_hms(my_data$Date1)), max(ymd_hms(my_data$Date1)),
by = "1 month"),
T1 = c("A", "B"),
stringsAsFactors = FALSE)
table_2 <- merge(
my_data %>%
mutate(Date2 = ymd(Date2),
Date1 = ymd_hms(Date1)) %>%
full_join(date_range, by = c("Date1", "T1")) %>% # join date ranges to table
arrange(Date1) %>% # sort by date
mutate(Month = paste(month(Date1, label = TRUE), year(Date1), sep = "-"),
row_number = row_number(), # create row_numbers to keep up order
Val1 = coalesce(Val1, 0L)) %>% # replace NA with 0 in Val1
filter(T1 == "A") %>%
group_by(Month) %>%
summarise("# of A" = n(),
"sum of A" = sum(Val1, na.rm = TRUE),
"Median of A" = median(Val1, na.rm = TRUE), # compute median
"Avg Time of A" = round(mean(difftime(Date2, Date1),
na.rm = TRUE), # compute avg time
2),
row_number = min(row_number)) %>% # get min row number
arrange(row_number) %>% # sort by row number (to sort months)
mutate("MOM Growth # of A" = round(apply(cbind(`# of A`, lag(- `# of A`)),
1, sum, na.rm = TRUE) / lag(`# of A`), 2),
"MOM Growth sum of A" = round(apply(cbind(`sum of A`, lag(- `sum of A`)),
1, sum, na.rm = TRUE) / lag(`sum of A`) * 100, 2)) %>%
mutate("MOM Growth # of A" = if_else(is.infinite(`MOM Growth # of A`), 100, `MOM Growth # of A`), # replace Inf with 100
"MOM Growth sum of A" = if_else(is.infinite(`MOM Growth sum of A`), 100, `MOM Growth sum of A`)) %>%
select(Month, `# of A`, `MOM Growth # of A`,
`sum of A`, `MOM Growth sum of A`,
`Median of A`, `Avg Time of A`),
my_data %>%
mutate(Date2 = ymd(Date2),
Date1 = ymd_hms(Date1)) %>%
full_join(date_range, by = c("Date1", "T1")) %>%
arrange(Date1) %>%
mutate(Month = paste(month(Date1, label = TRUE), year(Date1), sep = "-"),
row_number = row_number(),
Val1 = coalesce(Val1, 0L)) %>%
filter(T1 == "B") %>%
group_by(Month) %>%
summarise("# of B" = n(),
"sum of B" = sum(Val1, na.rm = TRUE),
"Median of B" = median(Val1, na.rm = TRUE),
"Avg Time of B" = round(mean(difftime(Date2, Date1),
na.rm = TRUE),
2),
row_number = min(row_number)) %>%
arrange(row_number) %>%
mutate("MOM Growth # of B" = round(apply(cbind(`# of B`, lag(- `# of B`)),
1, sum, na.rm = TRUE) / lag(`# of B`), 2),
"MOM Growth sum of B" = round(apply(cbind(`sum of B`, lag(- `sum of B`)),
1, sum, na.rm = TRUE) / lag(`sum of B`) * 100, 2)) %>%
mutate("MOM Growth # of B" = if_else(is.infinite(`MOM Growth # of B`), 100, `MOM Growth # of B`),
"MOM Growth sum of B" = if_else(is.infinite(`MOM Growth sum of B`), 100, `MOM Growth sum of B`)) %>%
select(Month, `# of B`, `MOM Growth # of B`,
`sum of B`, `MOM Growth sum of B`,
`Median of B`, `Avg Time of B`),
by = "Month",
all = TRUE,
sort = FALSE) # do not sort by ID column to keep month order
'Remove' missing values:
table_2[is.na(table_2)] = ""
Create a tableHTML
from that data:
table_2 %>% tableHTML(rownames = FALSE,
widths = rep(100, 13),
second_headers = list(c(1, 4, 4),
c("", "Status of A", "Status of B")),
caption = "A & B consolidated") %>%
add_css_caption(css = list(c("font-weight", "border"),
c("bold", "1px solid black")))
And this is the result:
这篇关于计算每月数据的平均值和中位数,并将其转换为htmltable R的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!