## RNA-seq analysis with DESeq2 ## Stephen Turner, @genetics_blog # RNA-seq data from GSE52202 # http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=gse52202. All patients with # ALS, 4 with C9 expansion ("exp"), 4 controls without expansion ("ctl") # Import & pre-process ---------------------------------------------------- # Import data from featureCounts ## Previously ran at command line something like this: ## featureCounts -a genes.gtf -o counts.txt -T 12 -t exon -g gene_id GSM*.sam countdata <- read.table("/Users/vollmers/data/Macrophages/Illumina_lncRNA/NoStimvsPoly.txt", header=TRUE, row.names=1) # Remove .bam or .sam from filenames #colnames(countdata) <- gsub("\\.[sb]am$", "", colnames(countdata)) # Convert to matrix countdata <- as.matrix(countdata) head(countdata) # Assign condition (first four are controls, second four contain the expansion) (condition <- factor(c(rep("ctl", 2), rep("exp", 2)))) (patient <- factor(c("S1","S2","S1","S2"))) # Analysis with DESeq2 ---------------------------------------------------- library(DESeq2) # Create a coldata frame and instantiate the DESeqDataSet. See ?DESeqDataSetFromMatrix (coldata <- data.frame(row.names=colnames(countdata), condition,patient)) dds <- DESeqDataSetFromMatrix(countData=countdata, colData=coldata, design=~condition) dds # Run the DESeq pipeline dds <- DESeq(dds) # Get differential expression results res <- results(dds) table(res$padj<0.05) ## Order by adjusted p-value res <- res[order(res$padj), ] ## Merge with normalized count data resdata <- merge(as.data.frame(res), as.data.frame(counts(dds, normalized=TRUE)), by="row.names", sort=FALSE) names(resdata)[1] <- "Gene" head(resdata) ## Write results write.csv(resdata, file="/Users/vollmers/data/Macrophages/Illumina_lncRNA/NoStimPolyI.txt.deseq2")