Commit 9327a6a3 authored by Olesya's avatar Olesya 🐋
Browse files

Initial Commit

parents
FROM docker.io/bioconductor/bioconductor_docker:RELEASE_3_13
COPY install.R .
RUN Rscript install.R
COPY hobotnica/ .
RUN cd hobotnica && R CMD INSTALL --no-multiarch --with-keep.source .
ENTRYPOINT ["/bin/bash"]
\ No newline at end of file
Package: Hobotnica
Type: Package
Title: Rank statistics for comparing different groups in dataset
Version: 0.1.0
Author: Who wrote it
Maintainer: Alexey Sizykh <sizykh.ad@phystech.edu>
Description: More about what it does (maybe more than one line)
Use four spaces when indenting paragraphs within the Description.
License: MIT
Encoding: UTF-8
LazyData: true
Depends: doParallel, ggplot2
Imports: doParallel, ggplot2
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 4
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
AutoAppendNewline: Yes
StripTrailingWhitespace: Yes
BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
import("doParallel")
import("ggplot2")
export("Hobotnica")
export("GenerateRandomSignatures")
export("GenerateDistributionByPermutations")
export("LengthPlotter")
export("MDSPlotter")
\ No newline at end of file
Hobotnica <- function(distMatrix, annotation){
if (typeof(annotation) == "list") {
annotation <- as.vector(unlist(annotation))
} else {
annotation <- as.vector(annotation)
}
rank.m <- as.matrix(distMatrix) # transform distance matrix to matrix object
rank.m[lower.tri(rank.m)] <- rank(rank.m[lower.tri(rank.m)]) # transform distances to ranks
rank.m[upper.tri(rank.m)] <- rank(rank.m[upper.tri(rank.m)]) #
inclass_sum <- 0
classes <- unique(annotation) # unique classes
Ns <- vector()
for (i in 1:length(classes)){
clas <- classes[i]
class_samples <- which(annotation == clas)
l_tmp <- length(class_samples)
Ns[i] <- l_tmp
tmp_sum_inclass <- sum(rank.m[class_samples,class_samples]) # sum of ranks, describing in-class distances
inclass_sum <- inclass_sum + tmp_sum_inclass
}
Ns_sum <- sum(Ns)
biggest_bossible_rank <- Ns_sum * (Ns_sum - 1)/2
number_of_unique_inclass_elements <- sum(Ns * (Ns-1))/2
maximal_value <- number_of_unique_inclass_elements * (2*biggest_bossible_rank - number_of_unique_inclass_elements + 1)
minimal_value <- number_of_unique_inclass_elements* (1 + number_of_unique_inclass_elements)
normalization_factor <- maximal_value - minimal_value
return (max(0, 1 - (inclass_sum - minimal_value)/normalization_factor ))
}
Hobot_distr <- function(N ,distMatrix, annotation){
hobots <- vector()
for (i in 1:100000){
sample_anno <- annotation
sample_anno[,1] <- sample(annotation[,1])
hobots <- c(hobots, Hobotnica(distMatrix, sample_anno)$total)
}
return(hobots)
}
Hobot_pval <- function(Test_hobot ,Hobots){
p_val <- mean(Hobots <= Test_hobot)
return(p_val)
}
GenerateRandomSignatures <- function(dataset, signatureLength=100, nSignatures=0, nCores=1) {
if (length(dim(dataset)) != 2) {
stop("The dataset dim length should be equal 2, stopping.")
}
if (signatureLength <= 0) {
stop("Signature length should be greater than zero, stopping.")
}
if (signatureLength > dim(dataset)[1]) {
stop ("signatureLength shoudld be less or equal to size of dataset, stopping.")
}
if (nSignatures == 0) {
nSignatures = dim(dataset)[1]*10
}
if (nCores == 0){
nCores = detectCores() - 1
}
cl <- parallel::makeForkCluster(nCores)
doParallel::registerDoParallel(cl)
randomGeneSignatures <- foreach (i=1:nSignatures) %dopar% {
sample(rownames(dataset), signatureLength, replace=FALSE)
}
parallel::stopCluster(cl)
return(randomGeneSignatures)
}
GenerateDistributionByPermutations <- function(distMatrix, annotation, nPermutations=0, nCores=1) {
#
if (length(dim(distMatrix)) != 2) {
stop("The distMatrix dim length should be equal 2, stopping.")
}
if (dim(distMatrix)[1] != dim(distMatrix)[2] ) {
stop("distMatrix should be a square matrix, stopping")
}
if (length(signature) <= 0) {
stop("Signature length should be greater than zero, stopping.")
}
if (length(annotation) <= 0) {
stop("Annotation length should be greater than zero, stopping")
}
if (length(annotation) != dim(distMatrix)[2]) {
stop("Annotation length should be equal number of columns in dataset, stopping")
}
if (nPermutations == 0) {
nPermutations = dim(distMatrix)[1]*5
}
if (nCores == 1) {
scores <- list()
for (i in 1:nPermutations) {
permutedAnnotation <- sample(annotation, length(annotation), replace=FALSE)
scores[[i]] <- Hobotnica(distMatrix, permutedAnnotation)
}
} else {
if (nCores == 0) {
nCores = detectCores() - 1
}
cl <- parallel::makeForkCluster(nCores)
doParallel::registerDoParallel(cl)
scores <- foreach (i = 1:nPermutations) %dopar% {
permutedAnnotation <- sample(annotation, length(annotation), replace=FALSE)
Hobotnica(distMatrix, permutedAnnotation)
}
}
return (scores)
}
LengthPlotter <- function(dataset, annotation, rangedGenes, distFunction=dist, minLength=10, maxLength=200, name=NULL, nCores=1) {
if ((length(rangedGenes) != dim(dataset)[1] && length(rangedGenes) < maxLength) || length(rangedGenes) <= minLength) {
stop("lenght of rangedGenes should be equal to number of genes in dataset or equal or greater than maxLength and greater than minLength, stopping.")
}
if (minLength >= maxLength) {
stop("maxLength should be greater than minLength, stopping.")
}
if (nCores <= 0) {
stop("nCores should be greater or equal to zero, stopping.")
}
if (name == NULL) {
name = paste("Hobotnica", name, paste(minLength, maxLength, sep=":"), sep=" ")
}
if (nCores == 1) {
scores <- list()
for (len in minLength:maxLength) {
datasetCut <- dataset[1:len, ]
distMatrix <- distFunction(datasetCut)
scores[[len]] <- Hobotnica(dataset, annotation)
}
plot <- qplot(minLength:maxLength, unlist(scores), main=name) + labs(x="Signature length", y="Score")
return (plot)
} else {
if (nCores == 0) {
nCores = detectCores() -1
}
scores <- foreach(i = 1:(maxLength-minLength)) %dopar% {
datasetCut <- dataset[1:len, ]
distMatrix <- distFunction(datasetCut)
Hobotnica(dataset, annotation)
}
names(scores) <- minLength:maxLength
plot <- qplot(minLength:maxLength, unlist(scores), main=name) + labs(x="Signature length", y="Score")
return (plot)
}
}
MDSPlotter <- function(distMatrix, annotation, name = NULL) {
if (dim(distMatrix)[1] != dim(distMatrix)[2]) {
stop("distMatrix must be a square matrix, stopping.")
}
if (is.null(name)) {
name <- "MDS Plot"
}
fit <- cmdscale(distMatrix,k=2, list.=TRUE)
x <- fit$points[,1]
y <- fit$points[,2]
plot <- qplot(x, y, xlab="Coordinate 1", ylab="Coordinate 2",
main=name, colour=annotation)+ theme(legend.position = "none", plot.title = element_text(hjust = 0.5, size=16, face="bold"))
return(plot)
}
\name{GenerateDistributionByPermutations}
\alias{GenerateDistributionByPermutations}
\title{Parallel generation of Hobotnica scores by permuting initial annotation of dataset.}
\usage{
GenerateDistributionByPermutations(distMatrix, annotation, nPermutations, nCores)
}
\description{
Generates Hobotnica scores by randomly permuting the annotation given matrix of distances, annotation and number of permutations to generate.
Note that by default program is executed in a non-parallel manner; set nCores = 0 to use maximal number of cores - 1 or nCores > 1 to choose number of cores on your own.
nCores = 1 corresponds to non-parallel execution.
}
\examples{
data(iris)
distMatrix = dist(iris[, -5])
annotation = iris[, 5]
distMatrix = as.matrix(distMatrix)
GenerateDistributionByPermutations(distMatrix, annotation, 300)
}
\name{GenerateRandomSignatures}
\alias{GenerateRandomSignatures}
\title{Random signatures generation}
\usage{
GenerateRandomSignatures(dataset, signatureLength, nSignatures, nCores)
}
\description{
Generates random signatures given dataset, length of each signature (one number, all generates signatures are of equal length) and number of signatures.
}
\examples{
data(iris)
dataset = iris[, -5]
GenerateRandomSignatures(dataset, 100, 200)
}
\name{Hobotnica}
\alias{Hobotnica}
\title{Hobotnica algorithm}
\usage{
Hobotnica(distMatrix, annotation)
}
\description{
Calculates Hobotnica score on dataset given grouping
}
\examples{
data(iris)
distMatrix = dist(iris[, -5])
annotation = iris[, 5]
Hobotnica(distMatrix, annotation)
}
install.packages(c('doParallel', 'ggplot2'), dependencies = TRUE)
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("DESeq2")
BiocManager::install("tximeta")
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment