Blog Notes
library(ggplot2)
library(forcats)
library(hexbin)
install.packages('devtools')
## Installing package into '/home/abaryiames/R/x86_64-pc-linux-gnu-library/3.5'
## (as 'lib' is unspecified)
devtools::install_github('bbc/bbplot')
## Downloading GitHub repo bbc/bbplot@master
## stringr (1.3.1 -> 1.4.0) [CRAN]
## Installing 1 packages: stringr
## Installing package into '/home/abaryiames/R/x86_64-pc-linux-gnu-library/3.5'
## (as 'lib' is unspecified)
##
checking for file ‘/tmp/RtmpP9d6IB/remotes141749c3c44b/bbc-bbplot-82af595/DESCRIPTION’ ...
✔ checking for file ‘/tmp/RtmpP9d6IB/remotes141749c3c44b/bbc-bbplot-82af595/DESCRIPTION’
##
─ preparing ‘bbplot’:
##
checking DESCRIPTION meta-information ...
─ checking DESCRIPTION meta-information ...It is recommended to use ‘given’ instead of ‘middle’.
##
It is recommended to use ‘given’ instead of ‘middle’.
## It is recommended to use ‘given’ instead of ‘middle’.
##
OK
##
It is recommended to use ‘given’ instead of ‘middle’.
##
It is recommended to use ‘given’ instead of ‘middle’.
## It is recommended to use ‘given’ instead of ‘middle’.
##
─ checking for LF line-endings in source and make files and shell scripts
##
─ checking for empty or unneeded directories
##
─ looking to see if a ‘data/datalist’ file should be added
##
─ building ‘bbplot_0.2.tar.gz’
##
##
## Installing package into '/home/abaryiames/R/x86_64-pc-linux-gnu-library/3.5'
## (as 'lib' is unspecified)
census = read.csv('/usr/share/data/kaggle/census.csv')
#Scatterplot
ggplot(data = census) +
geom_point(mapping = aes(x = Citizen, y = TotalPop))
data:image/s3,"s3://crabby-images/f3c95/f3c95ad6377082dc2e099cc685d464d40e0aa047" alt=""
#Histogram
ggplot(data=census) +
geom_histogram(aes(x=Citizen), binwidth=100) +
xlim(c(0,15000))
## Warning: Removed 22 rows containing non-finite values (stat_bin).
## Warning: Removed 2 rows containing missing values (geom_bar).
data:image/s3,"s3://crabby-images/eb1d5/eb1d5b31e41b87980b437c9f478ae284b062806a" alt=""
#Logical Subsetting
logic = census$Citizen==0
#Tests for every col. in citizen, does it contain a zero?
head(logic)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE
zerocit = census[logic,]
#This creates a list of rows with a zero in them
nrow(zerocit)
## [1] 691
#Density Curves
ggplot(data=census) +
geom_density(aes(x=Citizen)) +
xlim(c(0,10000))
## Warning: Removed 158 rows containing non-finite values (stat_density).
data:image/s3,"s3://crabby-images/dc9d0/dc9d0e15f94ec62cfa1a8caf086f188d43bb5103" alt=""
#Area function
ggplot(data=census) +
geom_area(stat='bin', bins = 30, aes(x=Citizen)) +
xlim(c(0,10000))
## Warning: Removed 158 rows containing non-finite values (stat_bin).
data:image/s3,"s3://crabby-images/7e470/7e470e484bd6a228213b58ece970788e0c131402" alt=""
#Descrete, Bar chart
ggplot(data=census) +
geom_bar(aes(x=State))
data:image/s3,"s3://crabby-images/7e753/7e753851a0735bb8ba1b63f66663d21284e33185" alt=""
#Make a better bar chart
ggplot(data=census) +
geom_bar(aes(x=fct_infreq(State))) +
xlab('') +
theme(axis.text.x = element_text(angle=90))
data:image/s3,"s3://crabby-images/12770/1277046ae681f86c27890854ea3f261d4f85b8cc" alt=""
#Mean commute
ggplot(data=census) +
geom_histogram(stat="identity", aes(x= State,y= MeanCommute)) +
theme(axis.text.x = element_text(angle=90))
## Warning: Ignoring unknown parameters: binwidth, bins, pad
## Warning: Removed 949 rows containing missing values (position_stack).
data:image/s3,"s3://crabby-images/f9033/f9033ac7f93b8d01b2bdb178dc2881a0a6d2277f" alt=""
#Smooth
ggplot(data=census) +
geom_point(aes(x=IncomePerCap, y=Poverty), alpha=0.1) +
geom_smooth(aes(x=IncomePerCap, y=Poverty))
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 881 rows containing non-finite values (stat_smooth).
## Warning: Removed 881 rows containing missing values (geom_point).
data:image/s3,"s3://crabby-images/503eb/503eb31ab36277e0367c964d7309da4c4f5a026c" alt=""
#Hexbin
ggplot(data=census) +
geom_hex(aes(x=IncomePerCap, y=Poverty))
## Warning: Removed 881 rows containing non-finite values (stat_binhex).
data:image/s3,"s3://crabby-images/66605/666051d462fef6f59b05bd704c1c302b962888a3" alt=""
#Contour plot
ggplot(data=census) +
geom_density2d(aes(x=IncomePerCap, y=Poverty))
## Warning: Removed 881 rows containing non-finite values (stat_density2d).
data:image/s3,"s3://crabby-images/510bd/510bd2878c7f8faf25292ea0edc1d31f304a3c9b" alt=""
#One Discrete X One Continuous parameter
ggplot(data=census[1:10000,]) +
geom_boxplot(aes(x=State, y=Poverty))
## Warning: Removed 72 rows containing non-finite values (stat_boxplot).
data:image/s3,"s3://crabby-images/a8a81/a8a81092f071683cfcd61fb10db4f5092b41643a" alt=""
#Violin???
ggplot(data=census[1:10000,]) +
geom_violin(aes(x=State, y=Poverty))
## Warning: Removed 72 rows containing non-finite values (stat_ydensity).
data:image/s3,"s3://crabby-images/0d2f8/0d2f8ee221ee7bad031060aedeaf9ddf891b0c92" alt=""
#Facets
ggplot(data=census[census$State %in% c('Alabama', 'Oregon', 'New York', 'California'),]) +
geom_point(aes(x=Employed, y=Poverty)) +
facet_grid(~State) + theme(axis.text.x = element_text(angle=90))
## Warning: Removed 165 rows containing missing values (geom_point).
data:image/s3,"s3://crabby-images/2875f/2875f520fbaa6241c61097fa0419a399331c04bd" alt=""
Blog Assignment
library(ggplot2)
newdata = read.csv('/usr/share/data/classfiles/readlengths3.txt', sep = ' ')
ggplot(data=newdata) +
geom_density(aes(x=readlength)) +
aes(color = SampleID) +
labs(title= "Frequency of Sequence Lengths", x= "Read length, bp", y= "Density")
data:image/s3,"s3://crabby-images/da7f9/da7f939e1e3e9bff2d6cd9670b693dbb86a19dfb" alt=""
home