calamari_ocr.ocr.dataset
.datareader.abbyy
- class calamari_ocr.ocr.dataset.datareader.abbyy.reader.Abbyy(channels: int = 1, to_gray_method: str = 'cv', skip_invalid: bool = True, non_existing_as_empty: bool = False, n_folds: int = -1, preload: bool = True, images: List[str] = <factory>, xml_files: List[str] = <factory>, gt_extension: str = '.abbyy.xml', binary: bool = False, pred_extension: str = '.abbyy.pred.xml')
Bases:
calamari_ocr.ocr.dataset.datareader.base.CalamariDataGeneratorParams
- images: List[str]
- xml_files: List[str]
- gt_extension: str = '.abbyy.xml'
- binary: bool = False
- pred_extension: str = '.abbyy.pred.xml'
- select(indices: List[int])
- to_prediction()
- static cls()
- prepare_for_mode(mode: tfaip.data.pipeline.definitions.PipelineMode)
- __init__(channels: int = 1, to_gray_method: str = 'cv', skip_invalid: bool = True, non_existing_as_empty: bool = False, n_folds: int = -1, preload: bool = True, images: typing.List[str] = <factory>, xml_files: typing.List[str] = <factory>, gt_extension: str = '.abbyy.xml', binary: bool = False, pred_extension: str = '.abbyy.pred.xml') None
- classmethod from_dict(kvs: Optional[Union[dict, list, str, int, float, bool]], *, infer_missing=False) dataclasses_json.api.A
- classmethod from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) dataclasses_json.api.A
- classmethod schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) dataclasses_json.mm.SchemaF[dataclasses_json.mm.A]
- to_dict(encode_json=False, include_cls=True) Dict[str, Optional[Union[dict, list, str, int, float, bool]]]
- to_json(*, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Optional[Union[int, str]] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) str
.datareader.file
- class calamari_ocr.ocr.dataset.datareader.file.FileDataParams(channels: int = 1, to_gray_method: str = 'cv', skip_invalid: bool = True, non_existing_as_empty: bool = False, n_folds: int = -1, preload: bool = True, images: List[str] = <factory>, texts: List[str] = <factory>, gt_extension: str = '.gt.txt', pred_extension: str = '.pred.txt')
Bases:
calamari_ocr.ocr.dataset.datareader.base.CalamariDataGeneratorParams
- images: List[str]
- texts: List[str]
- gt_extension: str = '.gt.txt'
- pred_extension: str = '.pred.txt'
- static cls()
- to_prediction()
- select(indices: List[int])
- prepare_for_mode(mode: tfaip.data.pipeline.definitions.PipelineMode)
- __init__(channels: int = 1, to_gray_method: str = 'cv', skip_invalid: bool = True, non_existing_as_empty: bool = False, n_folds: int = -1, preload: bool = True, images: typing.List[str] = <factory>, texts: typing.List[str] = <factory>, gt_extension: str = '.gt.txt', pred_extension: str = '.pred.txt') None
- classmethod from_dict(kvs: Optional[Union[dict, list, str, int, float, bool]], *, infer_missing=False) dataclasses_json.api.A
- classmethod from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) dataclasses_json.api.A
- classmethod schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) dataclasses_json.mm.SchemaF[dataclasses_json.mm.A]
- to_dict(encode_json=False, include_cls=True) Dict[str, Optional[Union[dict, list, str, int, float, bool]]]
- to_json(*, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Optional[Union[int, str]] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) str
.datareader.hdf5
- class calamari_ocr.ocr.dataset.datareader.hdf5.reader.Hdf5(channels: int = 1, to_gray_method: str = 'cv', skip_invalid: bool = True, non_existing_as_empty: bool = False, n_folds: int = -1, preload: bool = True, files: List[str] = <factory>, pred_extension: str = '.pred.h5')
Bases:
calamari_ocr.ocr.dataset.datareader.base.CalamariDataGeneratorParams
- files: List[str]
- pred_extension: str = '.pred.h5'
- to_prediction()
- static cls()
- prepare_for_mode(mode: tfaip.data.pipeline.definitions.PipelineMode)
- __init__(channels: int = 1, to_gray_method: str = 'cv', skip_invalid: bool = True, non_existing_as_empty: bool = False, n_folds: int = -1, preload: bool = True, files: typing.List[str] = <factory>, pred_extension: str = '.pred.h5') None
- classmethod from_dict(kvs: Optional[Union[dict, list, str, int, float, bool]], *, infer_missing=False) dataclasses_json.api.A
- classmethod from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) dataclasses_json.api.A
- classmethod schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) dataclasses_json.mm.SchemaF[dataclasses_json.mm.A]
- to_dict(encode_json=False, include_cls=True) Dict[str, Optional[Union[dict, list, str, int, float, bool]]]
- to_json(*, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Optional[Union[int, str]] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) str
.datareader.pagexml
- class calamari_ocr.ocr.dataset.datareader.pagexml.reader.PageXML(channels: int = 1, to_gray_method: str = 'cv', skip_invalid: bool = True, non_existing_as_empty: bool = False, n_folds: int = -1, preload: bool = True, images: List[str] = <factory>, xml_files: List[str] = <factory>, gt_extension: str = '.xml', text_index: int = 0, pad: Union[List[int], NoneType] = None, pred_extension: str = '.pred.xml', skip_commented: bool = False, cut_mode: calamari_ocr.ocr.dataset.datareader.pagexml.reader.CutMode = <CutMode.POLYGON: 1>, output_confidences: bool = False, output_glyphs: bool = False, max_glyph_alternatives: int = 1, delete_old_words: bool = True)
Bases:
calamari_ocr.ocr.dataset.datareader.base.CalamariDataGeneratorParams
- images: List[str]
- xml_files: List[str]
- gt_extension: str = '.xml'
- text_index: int = 0
- pad: Optional[List[int]] = None
- pred_extension: str = '.pred.xml'
- skip_commented: bool = False
- cut_mode: calamari_ocr.ocr.dataset.datareader.pagexml.reader.CutMode = 1
- output_confidences: bool = False
- output_glyphs: bool = False
- max_glyph_alternatives: int = 1
- delete_old_words: bool = True
- select(indices: List[int])
- to_prediction()
- static cls()
- prepare_for_mode(mode: tfaip.data.pipeline.definitions.PipelineMode)
- __init__(channels: int = 1, to_gray_method: str = 'cv', skip_invalid: bool = True, non_existing_as_empty: bool = False, n_folds: int = -1, preload: bool = True, images: typing.List[str] = <factory>, xml_files: typing.List[str] = <factory>, gt_extension: str = '.xml', text_index: int = 0, pad: typing.Optional[typing.List[int]] = None, pred_extension: str = '.pred.xml', skip_commented: bool = False, cut_mode: calamari_ocr.ocr.dataset.datareader.pagexml.reader.CutMode = CutMode.POLYGON, output_confidences: bool = False, output_glyphs: bool = False, max_glyph_alternatives: int = 1, delete_old_words: bool = True) None
- classmethod from_dict(kvs: Optional[Union[dict, list, str, int, float, bool]], *, infer_missing=False) dataclasses_json.api.A
- classmethod from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) dataclasses_json.api.A
- classmethod schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) dataclasses_json.mm.SchemaF[dataclasses_json.mm.A]
- to_dict(encode_json=False, include_cls=True) Dict[str, Optional[Union[dict, list, str, int, float, bool]]]
- to_json(*, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Optional[Union[int, str]] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) str