calamari_ocr.ocr.dataset

.datareader.abbyy

class calamari_ocr.ocr.dataset.datareader.abbyy.reader.Abbyy(channels: int = 1, to_gray_method: str = 'cv', skip_invalid: bool = True, non_existing_as_empty: bool = False, n_folds: int = -1, preload: bool = True, images: List[str] = <factory>, xml_files: List[str] = <factory>, gt_extension: str = '.abbyy.xml', binary: bool = False, pred_extension: str = '.abbyy.pred.xml')

Bases: calamari_ocr.ocr.dataset.datareader.base.CalamariDataGeneratorParams

images: List[str]
xml_files: List[str]
gt_extension: str = '.abbyy.xml'
binary: bool = False
pred_extension: str = '.abbyy.pred.xml'
select(indices: List[int])
to_prediction()
static cls()
prepare_for_mode(mode: tfaip.data.pipeline.definitions.PipelineMode)
__init__(channels: int = 1, to_gray_method: str = 'cv', skip_invalid: bool = True, non_existing_as_empty: bool = False, n_folds: int = -1, preload: bool = True, images: typing.List[str] = <factory>, xml_files: typing.List[str] = <factory>, gt_extension: str = '.abbyy.xml', binary: bool = False, pred_extension: str = '.abbyy.pred.xml') None
classmethod from_dict(kvs: Optional[Union[dict, list, str, int, float, bool]], *, infer_missing=False) dataclasses_json.api.A
classmethod from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) dataclasses_json.api.A
classmethod schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) dataclasses_json.mm.SchemaF[dataclasses_json.mm.A]
to_dict(encode_json=False, include_cls=True) Dict[str, Optional[Union[dict, list, str, int, float, bool]]]
to_json(*, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Optional[Union[int, str]] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) str

.datareader.file

class calamari_ocr.ocr.dataset.datareader.file.FileDataParams(channels: int = 1, to_gray_method: str = 'cv', skip_invalid: bool = True, non_existing_as_empty: bool = False, n_folds: int = -1, preload: bool = True, images: List[str] = <factory>, texts: List[str] = <factory>, gt_extension: str = '.gt.txt', pred_extension: str = '.pred.txt')

Bases: calamari_ocr.ocr.dataset.datareader.base.CalamariDataGeneratorParams

images: List[str]
texts: List[str]
gt_extension: str = '.gt.txt'
pred_extension: str = '.pred.txt'
static cls()
to_prediction()
select(indices: List[int])
prepare_for_mode(mode: tfaip.data.pipeline.definitions.PipelineMode)
__init__(channels: int = 1, to_gray_method: str = 'cv', skip_invalid: bool = True, non_existing_as_empty: bool = False, n_folds: int = -1, preload: bool = True, images: typing.List[str] = <factory>, texts: typing.List[str] = <factory>, gt_extension: str = '.gt.txt', pred_extension: str = '.pred.txt') None
classmethod from_dict(kvs: Optional[Union[dict, list, str, int, float, bool]], *, infer_missing=False) dataclasses_json.api.A
classmethod from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) dataclasses_json.api.A
classmethod schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) dataclasses_json.mm.SchemaF[dataclasses_json.mm.A]
to_dict(encode_json=False, include_cls=True) Dict[str, Optional[Union[dict, list, str, int, float, bool]]]
to_json(*, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Optional[Union[int, str]] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) str

.datareader.hdf5

class calamari_ocr.ocr.dataset.datareader.hdf5.reader.Hdf5(channels: int = 1, to_gray_method: str = 'cv', skip_invalid: bool = True, non_existing_as_empty: bool = False, n_folds: int = -1, preload: bool = True, files: List[str] = <factory>, pred_extension: str = '.pred.h5')

Bases: calamari_ocr.ocr.dataset.datareader.base.CalamariDataGeneratorParams

files: List[str]
pred_extension: str = '.pred.h5'
to_prediction()
static cls()
prepare_for_mode(mode: tfaip.data.pipeline.definitions.PipelineMode)
__init__(channels: int = 1, to_gray_method: str = 'cv', skip_invalid: bool = True, non_existing_as_empty: bool = False, n_folds: int = -1, preload: bool = True, files: typing.List[str] = <factory>, pred_extension: str = '.pred.h5') None
classmethod from_dict(kvs: Optional[Union[dict, list, str, int, float, bool]], *, infer_missing=False) dataclasses_json.api.A
classmethod from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) dataclasses_json.api.A
classmethod schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) dataclasses_json.mm.SchemaF[dataclasses_json.mm.A]
to_dict(encode_json=False, include_cls=True) Dict[str, Optional[Union[dict, list, str, int, float, bool]]]
to_json(*, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Optional[Union[int, str]] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) str

.datareader.pagexml

class calamari_ocr.ocr.dataset.datareader.pagexml.reader.PageXML(channels: int = 1, to_gray_method: str = 'cv', skip_invalid: bool = True, non_existing_as_empty: bool = False, n_folds: int = -1, preload: bool = True, images: List[str] = <factory>, xml_files: List[str] = <factory>, gt_extension: str = '.xml', text_index: int = 0, pad: Union[List[int], NoneType] = None, pred_extension: str = '.pred.xml', skip_commented: bool = False, cut_mode: calamari_ocr.ocr.dataset.datareader.pagexml.reader.CutMode = <CutMode.POLYGON: 1>, output_confidences: bool = False, output_glyphs: bool = False, max_glyph_alternatives: int = 1)

Bases: calamari_ocr.ocr.dataset.datareader.base.CalamariDataGeneratorParams

images: List[str]
xml_files: List[str]
gt_extension: str = '.xml'
text_index: int = 0
pad: Optional[List[int]] = None
pred_extension: str = '.pred.xml'
skip_commented: bool = False
cut_mode: calamari_ocr.ocr.dataset.datareader.pagexml.reader.CutMode = 1
output_confidences: bool = False
output_glyphs: bool = False
max_glyph_alternatives: int = 1
select(indices: List[int])
to_prediction()
static cls()
prepare_for_mode(mode: tfaip.data.pipeline.definitions.PipelineMode)
__init__(channels: int = 1, to_gray_method: str = 'cv', skip_invalid: bool = True, non_existing_as_empty: bool = False, n_folds: int = -1, preload: bool = True, images: typing.List[str] = <factory>, xml_files: typing.List[str] = <factory>, gt_extension: str = '.xml', text_index: int = 0, pad: typing.Optional[typing.List[int]] = None, pred_extension: str = '.pred.xml', skip_commented: bool = False, cut_mode: calamari_ocr.ocr.dataset.datareader.pagexml.reader.CutMode = CutMode.POLYGON, output_confidences: bool = False, output_glyphs: bool = False, max_glyph_alternatives: int = 1) None
classmethod from_dict(kvs: Optional[Union[dict, list, str, int, float, bool]], *, infer_missing=False) dataclasses_json.api.A
classmethod from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) dataclasses_json.api.A
classmethod schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) dataclasses_json.mm.SchemaF[dataclasses_json.mm.A]
to_dict(encode_json=False, include_cls=True) Dict[str, Optional[Union[dict, list, str, int, float, bool]]]
to_json(*, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Optional[Union[int, str]] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable] = None, sort_keys: bool = False, **kw) str