Blog Notes
library(ggplot2)
library(forcats)
library(hexbin)
install.packages('devtools')
## Installing package into '/home/abaryiames/R/x86_64-pc-linux-gnu-library/3.5'
## (as 'lib' is unspecified)
devtools::install_github('bbc/bbplot')
## Downloading GitHub repo bbc/bbplot@master
## stringr (1.3.1 -> 1.4.0) [CRAN]
## Installing 1 packages: stringr
## Installing package into '/home/abaryiames/R/x86_64-pc-linux-gnu-library/3.5'
## (as 'lib' is unspecified)
##
checking for file ‘/tmp/RtmpP9d6IB/remotes141749c3c44b/bbc-bbplot-82af595/DESCRIPTION’ ...
✔ checking for file ‘/tmp/RtmpP9d6IB/remotes141749c3c44b/bbc-bbplot-82af595/DESCRIPTION’
##
─ preparing ‘bbplot’:
##
checking DESCRIPTION meta-information ...
─ checking DESCRIPTION meta-information ...It is recommended to use ‘given’ instead of ‘middle’.
##
It is recommended to use ‘given’ instead of ‘middle’.
## It is recommended to use ‘given’ instead of ‘middle’.
##
OK
##
It is recommended to use ‘given’ instead of ‘middle’.
##
It is recommended to use ‘given’ instead of ‘middle’.
## It is recommended to use ‘given’ instead of ‘middle’.
##
─ checking for LF line-endings in source and make files and shell scripts
##
─ checking for empty or unneeded directories
##
─ looking to see if a ‘data/datalist’ file should be added
##
─ building ‘bbplot_0.2.tar.gz’
##
##
## Installing package into '/home/abaryiames/R/x86_64-pc-linux-gnu-library/3.5'
## (as 'lib' is unspecified)
census = read.csv('/usr/share/data/kaggle/census.csv')
#Scatterplot
ggplot(data = census) +
geom_point(mapping = aes(x = Citizen, y = TotalPop))
#Histogram
ggplot(data=census) +
geom_histogram(aes(x=Citizen), binwidth=100) +
xlim(c(0,15000))
## Warning: Removed 22 rows containing non-finite values (stat_bin).
## Warning: Removed 2 rows containing missing values (geom_bar).
#Logical Subsetting
logic = census$Citizen==0
#Tests for every col. in citizen, does it contain a zero?
head(logic)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE
zerocit = census[logic,]
#This creates a list of rows with a zero in them
nrow(zerocit)
## [1] 691
#Density Curves
ggplot(data=census) +
geom_density(aes(x=Citizen)) +
xlim(c(0,10000))
## Warning: Removed 158 rows containing non-finite values (stat_density).
#Area function
ggplot(data=census) +
geom_area(stat='bin', bins = 30, aes(x=Citizen)) +
xlim(c(0,10000))
## Warning: Removed 158 rows containing non-finite values (stat_bin).
#Descrete, Bar chart
ggplot(data=census) +
geom_bar(aes(x=State))
#Make a better bar chart
ggplot(data=census) +
geom_bar(aes(x=fct_infreq(State))) +
xlab('') +
theme(axis.text.x = element_text(angle=90))
#Mean commute
ggplot(data=census) +
geom_histogram(stat="identity", aes(x= State,y= MeanCommute)) +
theme(axis.text.x = element_text(angle=90))
## Warning: Ignoring unknown parameters: binwidth, bins, pad
## Warning: Removed 949 rows containing missing values (position_stack).
#Smooth
ggplot(data=census) +
geom_point(aes(x=IncomePerCap, y=Poverty), alpha=0.1) +
geom_smooth(aes(x=IncomePerCap, y=Poverty))
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 881 rows containing non-finite values (stat_smooth).
## Warning: Removed 881 rows containing missing values (geom_point).
#Hexbin
ggplot(data=census) +
geom_hex(aes(x=IncomePerCap, y=Poverty))
## Warning: Removed 881 rows containing non-finite values (stat_binhex).
#Contour plot
ggplot(data=census) +
geom_density2d(aes(x=IncomePerCap, y=Poverty))
## Warning: Removed 881 rows containing non-finite values (stat_density2d).
#One Discrete X One Continuous parameter
ggplot(data=census[1:10000,]) +
geom_boxplot(aes(x=State, y=Poverty))
## Warning: Removed 72 rows containing non-finite values (stat_boxplot).
#Violin???
ggplot(data=census[1:10000,]) +
geom_violin(aes(x=State, y=Poverty))
## Warning: Removed 72 rows containing non-finite values (stat_ydensity).
#Facets
ggplot(data=census[census$State %in% c('Alabama', 'Oregon', 'New York', 'California'),]) +
geom_point(aes(x=Employed, y=Poverty)) +
facet_grid(~State) + theme(axis.text.x = element_text(angle=90))
## Warning: Removed 165 rows containing missing values (geom_point).
Blog Assignment
library(ggplot2)
newdata = read.csv('/usr/share/data/classfiles/readlengths3.txt', sep = ' ')
ggplot(data=newdata) +
geom_density(aes(x=readlength)) +
aes(color = SampleID) +
labs(title= "Frequency of Sequence Lengths", x= "Read length, bp", y= "Density")
home