Source code for gmql.dataset.parsers.Parsers

from .RegionParser import RegionParser
from . import COORDS_DEFAULT
from . import TAB, VCF


[docs]class BedParser(RegionParser): """ Standard Full BED Parser of 10 Columns """ def __init__(self): super().__init__(chrPos=0, startPos=1, stopPos=2, strandPos=5, otherPos=[(3, "3", "string"), (4, "4", 'double'), (6, "6", 'double'), (7, "7", 'double'), (8, "8", 'double'), (9, "9", "double")], delimiter="\t", coordinate_system=COORDS_DEFAULT, schema_format=TAB)
[docs]class ANNParser(RegionParser): """ Annotation Parser, 6 columns """ def __init__(self): super().__init__(chrPos=0, startPos=1, stopPos=2, strandPos=5, otherPos=[(3, "name", "string"), (4, "score", 'double')], delimiter="\t", coordinate_system=COORDS_DEFAULT, schema_format=TAB)
class BroadProjParser(RegionParser): def __init__(self): super().__init__(chrPos=0, startPos=1, stopPos=2, strandPos=None, otherPos=[(3, "name", "string")], delimiter="\t", coordinate_system=COORDS_DEFAULT, schema_format=TAB)
[docs]class BasicParser(RegionParser): """ Parser for Chr, Start, Stop only (no Strand) """ def __init__(self): super().__init__(chrPos=0, startPos=1, stopPos=2, strandPos=None, otherPos=None, delimiter="\t", coordinate_system=COORDS_DEFAULT, schema_format=TAB)
[docs]class NarrowPeakParser(RegionParser): """ Narrow Peaks Parser. 10 columns """ def __init__(self): super().__init__(chrPos=0, startPos=1, stopPos=2, strandPos=5, otherPos=[(3, "name", "string"), (4, "score", 'double'), (6, "signalValue", 'double'), (7, "pValue", 'double'), (8, "qValue", 'double'), (9, "peak", "double")], delimiter="\t", coordinate_system=COORDS_DEFAULT, schema_format=TAB)
class BroadPeakParser(RegionParser): """ Broad Peaks Parser. 10 columns """ def __init__(self): super().__init__(chrPos=0, startPos=1, stopPos=2, strandPos=5, otherPos=[(3, "name", "string"), (4, "score", 'double'), (6, "signalValue", 'double'), (7, "pValue", 'double'), (8, "qValue", 'double')], delimiter="\t", coordinate_system=COORDS_DEFAULT, schema_format=TAB)
[docs]class RnaSeqParser(RegionParser): """ Standard Full BED Parser of 10 Columns """ def __init__(self): super().__init__(chrPos=0, startPos=1, stopPos=2, strandPos=3, otherPos=[(4, "name", "string"), (5, "score", 'double')], delimiter="\t", coordinate_system=COORDS_DEFAULT, schema_format=TAB)
[docs]class BedScoreParser(RegionParser): """ Standard Full BED Parser of 10 Columns """ def __init__(self): super().__init__(chrPos=0, startPos=1, stopPos=2, strandPos=None, otherPos=[(3, "score", 'double')], delimiter="\t", coordinate_system=COORDS_DEFAULT, schema_format=TAB)
class VCFParser(RegionParser): """ Parser for VCF files """ def __init__(self): super().__init__(chrPos=0, startPos=1, stopPos=1, otherPos=[(2, "ID", "string"), (3, "REF", "string"), (4, "ALT", "string"), (5, "QUAL", "float"), (6, "FILTER", "string"), (7, "INFO", "string"), (8, "FORMAT", "string")], delimiter="\t", coordinate_system=COORDS_DEFAULT, schema_format=VCF)