Source code for drepr.models.parsers.v1.preprocessing_parser

from __future__ import annotations

from typing import Any, List, Type, Union

from drepr.models.parsers.v1.attr_parser import ParsedAttrs
from drepr.models.parsers.v1.path_parser import PathParser
from drepr.models.path import Path
from drepr.models.preprocessing import (
    PFilter,
    PMap,
    POutput,
    Preprocessing,
    PreprocessingType,
    PSplit,
    RMap,
)
from drepr.models.resource import Resource
from drepr.utils.validator import Validator


[docs]class PreprocessingParser: """ Preprocessing are defined as a list ``` - type: <preprocessing_type> # other properties ``` 1. If <preprocessing_type> is `pmap`, its other properties are: ``` - type: pmap [resource_id]: <resource_id> path: <path> [output]: <output> (default is None) [change_structure]: null|true|false (default is null) code: str ``` 2. If <preprocessing_type> is `pfilter`, its properties are: ``` - type: pfilter [resource_id]: <resource_id> path: <path> [output]: <output> (default is None) code: str ``` 3. If <preprocessing_type> is `rmap`, its properties are: ``` - type: rmap resource_id: <resource_id> path: <path> func_id: <func_id> [output]: <output> (default is None) ``` where `output` is either <resource_id> or an object of resource_id: <resource_id> attr: <attr_id> [attr_path]: <path> """ PREPRO_TYPES = {x.value for x in PreprocessingType} def __init__(self, path_parser: PathParser): self.path_parser = path_parser
[docs] def parse( self, default_resource_id: str, resources: List[Resource], attrs: ParsedAttrs, conf: list, ) -> List[Preprocessing]: Validator.must_be_list(conf, "Parsing preprocessing") result: list[Preprocessing] = [] for i, prepro in enumerate(conf): trace0 = f"Parsing preprocessing at position {i}" Validator.must_be_dict(prepro, trace0) Validator.must_have(prepro, "type", trace0) Validator.must_in( prepro["type"], self.PREPRO_TYPES, f"{trace0}\nParsing property `type`" ) prepro_type = PreprocessingType(prepro["type"]) if "resource_id" in prepro: Validator.must_be_str( prepro["resource_id"], f"{trace0}\nParsing property `resource_id`" ) resource_id = prepro["resource_id"] else: resource_id = default_resource_id trace1 = f"{trace0}\nParsing property `path`" Validator.must_have(prepro, "path", trace1) path = self.path_parser.parse( self.path_parser.get_resource(resources, resource_id, trace0), prepro["path"], trace1, ) if prepro_type == PreprocessingType.pmap: value = self.parse_pmap(resource_id, path, prepro, trace0) elif prepro_type == PreprocessingType.pfilter: value = self.parse_pfilter_psplit( resource_id, path, prepro, trace0, PFilter ) elif prepro_type == PreprocessingType.psplit: value = self.parse_pfilter_psplit( resource_id, path, prepro, trace0, PSplit ) elif prepro_type == PreprocessingType.rmap: value = self.parse_rmap(resource_id, path, prepro, trace0) else: raise NotImplemented( f"Not implement the parser for preprocessing function with type {prepro_type}" ) if value.output is not None and value.output.attr is not None: attrs.add_preprocessing_attr(value.output.attr) result.append(Preprocessing(prepro_type, value)) return result
[docs] def parse_pmap( self, resource_id: str, path: Path, prepro: dict, trace0: str ) -> PMap: trace1 = f"{trace0}\nParsing property `code`" Validator.must_have(prepro, "code", trace1) Validator.must_be_str(prepro["code"], trace1) code = prepro["code"] if "output" in prepro and prepro["output"] is not None: trace1 = f"{trace0}\nParsing property `output`" output = self.parse_output(prepro["output"], trace1) else: output = None if "change_structure" in prepro and prepro["change_structure"] is not None: trace1 = f"{trace0}\nParsing property `change_structure`" Validator.must_be_bool(prepro["change_structure"], trace1) change_structure = prepro["change_structure"] else: change_structure = None return PMap(resource_id, path, code, output, change_structure)
[docs] def parse_pfilter_psplit( self, resource_id: str, path: Path, prepro: dict, trace0: str, cls: Union[Type[PFilter], Type[PSplit]], ) -> PFilter | PSplit: trace1 = f"{trace0}\nParsing property `code`" Validator.must_have(prepro, "code", trace1) Validator.must_be_str(prepro["code"], trace1) code = prepro["code"] if "output" in prepro: trace1 = f"{trace0}\nParsing property `output`" output = self.parse_output(prepro["output"], trace1) else: output = None return cls(resource_id, path, code, output)
[docs] def parse_rmap( self, resource_id: str, path: Path, prepro: dict, trace0: str ) -> RMap: trace1 = f"{trace0}\nParsing property `func_id`" Validator.must_have(prepro, "func_id", trace1) Validator.must_be_str(prepro["func_id"], trace1) func_id = prepro["func_id"] if "output" in prepro: trace1 = f"{trace0}\nParsing property `output`" output = self.parse_output(prepro["output"], trace1) else: output = None return RMap(resource_id, path, func_id, output)
[docs] def parse_output(self, output: Any, trace: str) -> POutput: if isinstance(output, str): return POutput(resource_id=output, attr=None, attr_path=None) else: Validator.must_be_dict( output, f"{trace}\nParsing output. Must be either string or dictionary" ) Validator.must_be_subset( {"resource_id", "attr", "attr_path"}, output.keys(), "keys of preprocessing output", trace, ) if "resource_id" in output: Validator.must_be_str( output["resource_id"], f"{trace}\nParsing output's resource id" ) attr_path = None if "attr_path" in output: attr_path = self.path_parser.parse( resource=None, path=output["attr_path"], parse_trace=f"{trace}\nParsing output's attr_path", ) if "attr" in output: Validator.must_be_str( output["attr"], f"{trace}\nParsing output's attribute" ) return POutput( resource_id=output.get("resource_id"), attr=output.get("attr"), attr_path=attr_path, )