home

Blog Notes

library(ggplot2)
library(forcats)
library(hexbin)

install.packages('devtools')
## Installing package into '/home/abaryiames/R/x86_64-pc-linux-gnu-library/3.5'
## (as 'lib' is unspecified)
devtools::install_github('bbc/bbplot')
## Downloading GitHub repo bbc/bbplot@master
## stringr (1.3.1 -> 1.4.0) [CRAN]
## Installing 1 packages: stringr
## Installing package into '/home/abaryiames/R/x86_64-pc-linux-gnu-library/3.5'
## (as 'lib' is unspecified)
##   
   checking for file ‘/tmp/RtmpP9d6IB/remotes141749c3c44b/bbc-bbplot-82af595/DESCRIPTION’ ...
  
✔  checking for file ‘/tmp/RtmpP9d6IB/remotes141749c3c44b/bbc-bbplot-82af595/DESCRIPTION’
## 
  
─  preparing ‘bbplot’:
## 
  
   checking DESCRIPTION meta-information ...
  
─  checking DESCRIPTION meta-information ...It is recommended to use ‘given’ instead of ‘middle’.
## 
  
   It is recommended to use ‘given’ instead of ‘middle’.
##    It is recommended to use ‘given’ instead of ‘middle’.
## 
  
    OK
## 
  
   It is recommended to use ‘given’ instead of ‘middle’.
## 
  
   It is recommended to use ‘given’ instead of ‘middle’.
##    It is recommended to use ‘given’ instead of ‘middle’.
## 
  
─  checking for LF line-endings in source and make files and shell scripts
## 
  
─  checking for empty or unneeded directories
## 
  
─  looking to see if a ‘data/datalist’ file should be added
## 
  
─  building ‘bbplot_0.2.tar.gz’
## 
  
   
## 
## Installing package into '/home/abaryiames/R/x86_64-pc-linux-gnu-library/3.5'
## (as 'lib' is unspecified)
census = read.csv('/usr/share/data/kaggle/census.csv')

#Scatterplot
ggplot(data = census) +
  geom_point(mapping = aes(x = Citizen, y = TotalPop))

#Histogram
ggplot(data=census) +
  geom_histogram(aes(x=Citizen), binwidth=100) +
  xlim(c(0,15000))
## Warning: Removed 22 rows containing non-finite values (stat_bin).
## Warning: Removed 2 rows containing missing values (geom_bar).

#Logical Subsetting
logic = census$Citizen==0
#Tests for every col. in citizen, does it contain a zero?
head(logic)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE
zerocit = census[logic,]
#This creates a list of rows with a zero in them
nrow(zerocit)
## [1] 691
#Density Curves
ggplot(data=census) +
  geom_density(aes(x=Citizen)) +
  xlim(c(0,10000))
## Warning: Removed 158 rows containing non-finite values (stat_density).

#Area function
ggplot(data=census) +
  geom_area(stat='bin', bins = 30, aes(x=Citizen)) +
  xlim(c(0,10000))
## Warning: Removed 158 rows containing non-finite values (stat_bin).

#Descrete, Bar chart
ggplot(data=census) +
  geom_bar(aes(x=State))

#Make a better bar chart
ggplot(data=census) +
  geom_bar(aes(x=fct_infreq(State))) +
  xlab('') +
  theme(axis.text.x  = element_text(angle=90))

#Mean commute
ggplot(data=census) +
  geom_histogram(stat="identity", aes(x= State,y= MeanCommute)) +
  theme(axis.text.x  = element_text(angle=90))
## Warning: Ignoring unknown parameters: binwidth, bins, pad
## Warning: Removed 949 rows containing missing values (position_stack).

#Smooth
ggplot(data=census) + 
  geom_point(aes(x=IncomePerCap, y=Poverty), alpha=0.1) +
  geom_smooth(aes(x=IncomePerCap, y=Poverty))
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 881 rows containing non-finite values (stat_smooth).
## Warning: Removed 881 rows containing missing values (geom_point).

#Hexbin
ggplot(data=census) + 
  geom_hex(aes(x=IncomePerCap, y=Poverty))
## Warning: Removed 881 rows containing non-finite values (stat_binhex).

#Contour plot
ggplot(data=census) + 
  geom_density2d(aes(x=IncomePerCap, y=Poverty))
## Warning: Removed 881 rows containing non-finite values (stat_density2d).

#One Discrete X One Continuous parameter
ggplot(data=census[1:10000,]) + 
  geom_boxplot(aes(x=State, y=Poverty)) 
## Warning: Removed 72 rows containing non-finite values (stat_boxplot).

#Violin???
ggplot(data=census[1:10000,]) + 
  geom_violin(aes(x=State, y=Poverty))
## Warning: Removed 72 rows containing non-finite values (stat_ydensity).

#Facets
ggplot(data=census[census$State %in% c('Alabama', 'Oregon', 'New York', 'California'),]) + 
  geom_point(aes(x=Employed, y=Poverty)) +
  facet_grid(~State) + theme(axis.text.x = element_text(angle=90))
## Warning: Removed 165 rows containing missing values (geom_point).

Blog Assignment

library(ggplot2)
newdata = read.csv('/usr/share/data/classfiles/readlengths3.txt', sep = ' ')

ggplot(data=newdata) +
  geom_density(aes(x=readlength)) +
  aes(color = SampleID) +
  labs(title= "Frequency of Sequence Lengths", x= "Read length, bp", y= "Density")

home