fetchSequenceInfo.R 2.24 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
### =========================================================================
### fetchSequenceInfo()
### -------------------------------------------------------------------------


.fetch_sequence_info_for_UCSC_genome <- function(genome,
    goldenPath_url="http://hgdownload.cse.ucsc.edu/goldenPath")
{
    ext_chrominfo <- fetchExtendedChromInfoFromUCSC(genome,
                         goldenPath_url=goldenPath_url, quiet=TRUE)
    data.frame(seqnames=ext_chrominfo[ , "UCSC_seqlevel"],
               seqlengths=ext_chrominfo[ , "UCSC_seqlength"],
               is_circular=ext_chrominfo[ , "circular"],
               genome=genome,
               stringsAsFactors=FALSE)
}

#.fetch_sequence_info_for_NCBI_genome <- function(refseq_assembly_accession,
#                                                 AssemblyUnits,
#                                                 circ_seqs)
#{
#    assembly_report <- fetch_assembly_report(refseq_assembly_accession,
#                                             AssemblyUnits=AssemblyUnits)
#    ans_seqnames <- as.character(assembly_report[ , "SequenceName"])
#
#}
#
#SUPPORTED_NCBI_GENOMES <- list(
#    GRCh38=
#        list(refseq_assembly_accession="GCF_000001405.26", circular="MT")
#)

### Returns a data frame.
### Only supports UCSC genomes for now (the same genomes that are supported
### by fetchExtendedChromInfoFromUCSC()).
### NOT exported.
fetchSequenceInfo <- function(genome)
{
    if (!isSingleString(genome) || genome == "")
        stop("'genome' must be a single non-empty string")
    idx <- match(genome, names(SUPPORTED_UCSC_GENOMES))
    if (!is.na(idx))
        return(.fetch_sequence_info_for_UCSC_genome(genome))
    #idx <- match(genome, names(SUPPORTED_NCBI_GENOMES))
    #if (!is.na(idx)) {
    #    supported_genome <- SUPPORTED_NCBI_GENOMES[[idx]]
    #    refseq_assembly_accession <- supported_genome$refseq_assembly_accession
    #    AssemblyUnits <- supported_genome$AssemblyUnits
    #    circ_seqs <- supported_genome$circular
    #    return(.fetch_sequence_info_for_NCBI_genome(refseq_assembly_accession,
    #                                                AssemblyUnits,
    #                                                circ_seqs))
    #}
    stop("genome \"", genome, "\" is not supported")
}