Zone handling tools. More...

Topics
	Table Recognition Module
	Table detection and working with tables.

Classes
struct	_ZONE
	ZONE structure. More...

struct	ZONEDATA
	ZONEDATA structure. More...

Macros
#define	MAX_ZONE_LANGUAGES 2
	Length of languages array in a zone.

Typedefs
typedef struct _ZONE	ZONE
	ZONE structure.

typedef ZONE *	LPZONE
	Pointer to a structure ZONE.

typedef const ZONE *	LPCZONE
	Const pointer to a structure ZONE.

typedef ZONEDATA *	LPZONEDATA
	Pointer to a structure ZONEDATA.

typedef const ZONEDATA *	LPCZONEDATA
	Const pointer to a structure ZONEDATA.

Enumerations
enum	FILLINGMETHOD { FM_DEFAULT = 0 , FM_OMNIFONT , FM_DRAFTDOT9 , FM_BARCODE , FM_OMR , FM_HANDPRINT , FM_BRAILLE , FM_DRAFTDOT24 , FM_OCRA , FM_OCRB , FM_MICR , FM_BARCODE2D , FM_DOTDIGIT , FM_DASHDIGIT , FM_RESERVED_2 , FM_CMC7 , FM_NO_OCR , FM_AUTO_HAND , FM_AUTO_BAR , FM_MRZ , FM_SIZE }
	Filling methods. More...

enum	RECOGNITIONMODULE { RM_AUTO = 0 , RM_OMNIFONT_MTX , RM_OMNIFONT_MOR , RM_DOT , RM_BAR , RM_OMR , RM_HNR , RM_RER , RM_BRA , RM_MAT , RM_RESERVED_P , RM_OMNIFONT_PLUS2W , RM_OMNIFONT_FRX , RM_OMNIFONT_PLUS3W , RM_ASIAN , RM_RESERVED_M , RM_RESERVED_A , RM_ARABIC , RM_SIZE }
	Recognition modules (Engines) More...

enum	ZONETYPE { WT_FLOW , WT_TABLE , WT_GRAPHIC , WT_AUTO , WT_IGNORE , WT_FORM , WT_VERTTEXT , WT_LEFTTEXT , WT_RIGHTTEXT }
	Basic zone types. More...

enum	IMG_DECOMP { DCM_AUTO , DCM_LEGACY , DCM_STANDARD , DCM_FAST }
	Page parse method. More...

enum	ZONE_BORDER_TYPE { ZBT_PURE_TEXT , ZBT_FRAMED_PURE_TEXT , ZBT_SHORT_COMB , ZBT_FRAMED_SHORT_COMB , ZBT_INDIVIDUAL_BOXES , ZBT_FRAMED_FULL_COMB , ZBT_UNDERLINED_TEXT , ZBT_GRAY_BOXES , ZBT_SIZE }
	Zone border types. More...

enum	LOCATEHANDWRITINGMODE { LH_RETAIN_HANDWRITING , LH_REMOVE_HANDWRITING }
	kRecLocateHandwriting modes More...

enum	CREATEHANDWRINGPAGEMODE { CHP_FIND_HANDWRITING , CHP_SEPARATE_HANDWRITING }
	kRecCreateHandwritingPage modes More...

Functions
RECERR RECAPIKRN	kRecSetDecompMethod (int sid, IMG_DECOMP Algorithm)
	Setting the decomp method.

RECERR RECAPIKRN	kRecGetDecompMethod (int sid, IMG_DECOMP *pAlgorithm)
	Getting the decomp method.

RECERR RECAPIKRN	kRecSetNongriddedTableDetect (int sid, INTBOOL bEnable)
	Setting the non-gridded table detection.

RECERR RECAPIKRN	kRecGetNongriddedTableDetect (int sid, INTBOOL *bEnable)
	Getting the non-gridded table detection setting.

RECERR RECAPIKRN	kRecSetForceSingleColumn (int sid, INTBOOL bForceSingle)
	Specify the Force Single Column mode.

RECERR RECAPIKRN	kRecGetForceSingleColumn (int sid, INTBOOL *pbForceSingle)
	Getting the setting of Force Single Column mode.

RECERR RECAPIKRN	kRecLocateZones (int sid, HPAGE hPage)
	Page parsing.

RECERR RECAPIKRN	kRecLocateZonesEx (int sid, HPAGE hPage, int fm_mode)
	Extended page parsing.

RECERR RECAPIKRN	kRecSetPageDescription (int sid, DWORD PageDesc)
	Setting page description data.

RECERR RECAPIKRN	kRecGetPageDescription (int sid, DWORD *pPageDesc)
	Getting page description data.

RECERR RECAPIKRN	kRecGetZoneCount (HPAGE hPage, int *pnZones)
	Getting the user zone count.

RECERR RECAPIKRN	kRecGetZoneInfo (HPAGE hPage, IMAGEINDEX iiImg, LPZONE pZone, int nZone)
	Getting user zone information.

RECERR RECAPIKRN	kRecGetZoneLayout (HPAGE hPage, IMAGEINDEX iiImg, LPRECT ppRects, int pnRects, int iZone)
	Getting user zone shape information.

RECERR RECAPIKRN	kRecGetZoneNodeArray (HPAGE hPage, IMAGEINDEX iiImg, LPPOINT ppPoints, int pnNodes, int iZone)
	Getting the polygon of the user zone.

RECERR RECAPIKRN	kRecDeleteAllZones (HPAGE hPage)
	Deleting all user zones.

RECERR RECAPIKRN	kRecDeleteZone (HPAGE hPage, int nZone)
	Deleting a user zone.

void RECAPIKRN	kRecInitZone (LPZONE pZone)
	Initializing a ZONE variable.

RECERR RECAPIKRN	kRecInsertZone (HPAGE hPage, IMAGEINDEX iiImg, LPCZONE pZone, int nZone)
	Inserting a user zone.

RECERR RECAPIKRN	kRecAddZoneRect (HPAGE hPage, IMAGEINDEX iiImg, const RECT *pRect, int nZone)
	Adding a rectangle to a user zone.

RECERR RECAPIKRN	kRecSubZoneRect (HPAGE hPage, IMAGEINDEX iiImg, const RECT *pRect, int nZone)
	Subtracting a rectangle from a user zone.

RECERR RECAPIKRN	kRecSetZoneBorder (HPAGE hPage, int nZone, ZONE_BORDER_TYPE ZoneBorderType, int CellCount)
	Setting the border type of a zone.

RECERR RECAPIKRN	kRecGetZoneBorder (HPAGE hPage, int nZone, ZONE_BORDER_TYPE pZoneBorderType, int pCellCount)
	Getting the border type of zone.

RECERR RECAPIKRN	kRecCopyOCRZones (HPAGE hPage)
	Copying the OCR zone list to a user zone list.

RECERR RECAPIKRN	kRecLoadZones (HPAGE hPage, LPCTSTR pFileName)
	Loading user zones.

RECERR RECAPIKRN	kRecSaveZones (HPAGE hPage, LPCTSTR pFileName)
	Saving the user zone list.

RECERR RECAPIKRN	kRecUpdateZone (HPAGE hPage, IMAGEINDEX iiImg, LPCZONE pZone, int nZone)
	Updating a user zone.

RECERR RECAPIKRN	kRecSetZoneLayout (HPAGE hPage, IMAGEINDEX iiImg, LPCRECT pRects, int nRects, int nZone)
	Updating the user zone shape information.

RECERR RECAPIKRN	kRecGetOCRZoneCount (HPAGE hPage, int *pnOCRZones)
	Getting the OCR zone count.

RECERR RECAPIKRN	kRecGetOCRZoneInfo (HPAGE hPage, IMAGEINDEX iiImg, LPZONE pOCRZone, int nOCRZone)
	Getting OCR zone information.

RECERR RECAPIKRN	kRecGetOCRZoneData (HPAGE hPage, IMAGEINDEX iiImg, LPZONEDATA pOCRZoneData, int nOCRZone)
	Getting additional information about OCR zones.

RECERR RECAPIKRN	kRecGetOCRZoneLayout (HPAGE hPage, IMAGEINDEX iiImg, LPRECT ppRects, int pnRects, int nZone)
	Getting OCR zone shape information.

RECERR RECAPIKRN	kRecGetOCRZoneNodeArray (HPAGE hPage, IMAGEINDEX iiImg, LPPOINT ppPoints, int pnNodes, int iZone)
	Getting the polygon of the OCR zone.

RECERR RECAPIKRN	kRecSaveOCRZones (HPAGE hPage, LPCTSTR pFileName)
	Saving the OCR zone list.

RECERR RECAPIKRN	kRecUpdateOCRZone (HPAGE hPage, IMAGEINDEX iiImg, LPCZONE pZone, int nZone)
	Updating the OCR zone.

RECERR RECAPIKRN	kRecLocateHandwriting (int sid, HPAGE hPage, LOCATEHANDWRITINGMODE mode, LPCRECT pRect)
	Find/remove handwriting on/from the given image.

RECERR RECAPIKRN	kRecCreateHandwritingPage (int sid, HPAGE hPage, HPAGE *phPageOut, CREATEHANDWRINGPAGEMODE mode, LPCRECT pRect)
	Find/separate handwriting on the given image.

Bitmasks of checking control
Defining spell checking behavior by zones. See ZONE::chk_control.
#define	CHK_LANGDICT_PROHIBIT 0x00000001
	Prohibit the use of the Language dictionary.

#define	CHK_USERDICT_PROHIBIT 0x00000002
	Prohibit the use of the user dictionary.

#define	CHK_CHECKCBF_PROHIBIT 0x00000004
	Deprecated.

#define	CHK_VERTDICT_PROHIBIT 0x00000008
	Prohibit the use of the Vertical dictionary.

#define	CHK_IGNORE_WHITESPACE 0x00000010
	Ignore white space characters (SPACE and TAB characters) during checking. This field should be used together with the `CHK_PASS_LINES` flag.

#define	CHK_IGNORE_CASE 0x00000020
	Case insensitive UD-checking.

#define	CHK_PASS_LINES 0x00000040
	Instructs the selected RECOGNITIONMODULE to pass entire lines to the checker, instead of words. Do not use this attribute in conjunction with spell checking.

#define	CHK_CORRECTION_DISABLED 0x00000080
	Retained only for compatibility.

#define	CHK_INCLUDE_PUNCTUATION 0x00000100
	Checking will consider punctuation characters on the boundaries of the strings as well.

#define	CHK_CORRECT_PROPERNAMES 0x00000200
	Retained only for compatibility.

#define	CHK_LANGDICT_USED 0x00010000
	"After recognition flag": the Language dictionary was enabled during the checking process (spell checking was activated for the zone).

#define	CHK_USERDICT_USED 0x00020000
	"After recognition flag": the user dictionary was enabled during the checking process (UD-checking was activated for the zone).

#define	CHK_CHECKCBF_USED 0x00040000
	Deprecated.

#define	CHK_VERTDICT_USED 0x00080000
	"After recognition flag": a Vertical dictionary was enabled during the checking process.

Filling modes for extended page parsing
These flags describe the different filling modes possibly used on the page for helping the automatic zone locating process. These flags are combinable and to be passed to the function kRecLocateZonesEx.
#define	LZX_OMNIFONT 1
	Find machine print zones. See also FM_OMNIFONT.

#define	LZX_HANDPRINT 2
	Find handprint zones. See also FM_HANDPRINT.

#define	LZX_BARCODE 4
	Find 1D and 2D barcode zones. See also FM_BARCODE.

#define	LZX_MRZ 8
	Find MRZ zones. See also FM_MRZ.

Page Descriptor defines
Defining behavior of auto-zoning outside user zones. See the usage of page descriptor.
#define	LZ_COLUMN_MASK 0x000000ff
	This can be used for masking the `LZ_COLUMN` flag.

#define	LZ_COLUMN_NO 0x00000001
	This does not find text zones on the page.

#define	LZ_COLUMN_ONE 0x00000002
	The page contains one column (single column mode).

#define	LZ_COLUMN_AUTO 0x00000004
	This finds text zones on the page automatically.

#define	LZ_COLUMN_FIND 0x00000008
	Internal use only.

#define	LZ_TABLE_MASK 0x0000ff00
	This can be used for masking the `LZ_TABLE` flag.

#define	LZ_TABLE_NO 0x00000100
	This does not find tables on the page.

#define	LZ_TABLE_ONE 0x00000200
	The whole page is one table.

#define	LZ_TABLE_AUTO 0x00000400
	This finds table zones automatically.

#define	LZ_GRAPHICS_MASK 0x00ff0000
	This can be used for masking the `LZ_GRAPHICS` flag.

#define	LZ_GRAPHICS_NO 0x00010000
	This does not find graphics on the page.

#define	LZ_GRAPHICS_ONE 0x00020000
	The whole page is one graphic.

#define	LZ_GRAPHICS_AUTO 0x00040000
	This finds graphic zones automatically.

#define	LZ_FORM 0x01000000

#define	LZ_FREEFORM 0x02000000
	This can be used for recognition of free forms. This is when a page contains a filled, gridded form, and the best possible OCR accuracy is desired, without creating formatted output. In this case the gridded form is decomposed into smaller text zones optimized for OCR. The zones are not sorted by reading order. It cannot be combined with LZ_FORM. It is used only by DCM_STANDARD method.

Detailed Description

Zone handling tools.

The zoneis a rectangular area or the union of specifically located rectangular areas in the page. The upper limit of its dimensions is full page size. It also contains a feature of interest to the user.

The union of rectangles must have a so-called pizzabox shape: the top of each rectangle in the union must touch the bottom of the upper rectangle (i.e. the bottom of the upper one and the top of the lower one is exactly the same). A rectangle can touch at most one rectangle above and one below.
Zones that cannot have a pizzabox shape include:

Table zones - they must be rectangular,
Vertical text zones for CCJK or Western languages - they must be rectangular,
OMR zones (see FM_OMR, RM_OMR) - these zones may also contain non-touching rectangles.

A pizzabox-shaped zone is a compound and irregular zone.
The image data covered by each zone is handled and processed (typically recognized) separately, according to zone-specific parameters.

NOTE: In both the SDK and its documentation coordinates refer to grid-coordinates - i.e. the top or left borders of pixels. Thus a rectangle does not contain the pixels according to its right and bottom coordinates.

Any HPAGE can contain two types of zones in zone lists:

user zones and
OCR zones.

The user zones are defined by the User. The OCR zones are created by the page parser process, which detects the OCR zones and fills the OCR zone list. When there are user zones, the page parser creates one or more OCR zones from each user zone and it may process the area outside of user zones, as controled by the page descriptor (see below).

IMPORTANT: The CSDK does not support overlapping non-graphical user zones. A graphical user zone (WT_GRAPHIC) can overlap non-graphical ones. Furthermore, the auto zoning algorithm may create graphical OCR zones overlapping non-graphical ones.

The type of OCR zones never can be WT_AUTO and WT_IGNORE. The created OCR zones always inherit the attributes (e.g.: filter, filling method, etc.; see ZONE) of the user zone inside which they were created. If an OCR zone is created outside user zones, its attributes will be set to default for filling method, recognition module, filters and spell checking related properties.

The recognition process (kRecRecognize) works on OCR zones.

The number of zones in the zone lists can be queried at any time using the functions kRecGetZoneCount and kRecGetOCRZoneCount. All functions that use an index to determine the zone to be queried or modified may receive the index -1. This refers to the last zone in the given zone list. Exception: kRecInsertZone : Assign the value -1 to have the new zone inserted at the end of the zone list. From then on the value -1 refers to this inserted zone.

Zones can be added to the appropriate zone list of any given HPAGE in three different ways:

add zones automatically (auto-zoning) - use it to add OCR zones
add zones manually (by specifying the zone coordinates and attributes - use it to add user zones, or
add zones from a zone file (a storage file) - use it to add user zones.

Auto-zoning

Automatic page-layout decomposition process (auto-zoning) can be activated directly by calling the kRecLocateZones function for finding text blocks on the image. It creates an entire OCR zone list for the given HPAGE.

OmniPage Capture SDK v2025.1 offers three different algorithms to be applied during auto-zoning: use the kRecSetDecompMethod function to specify the Page parser algorithm to be applied during auto-zoning. For details, see also IMG_DECOMP.

When you use auto-zoning, each resulting zone is initialized with

the zone's detected coordinates,
the determined zone type (any ZONETYPE except WT_AUTO and WT_IGNORE).

All zones created by this function will have

the default filling method FM_DEFAULT,
the automatic recognition module selection RM_AUTO setting,
the FILTER_DEFAULT Character Set filter, to specify the use of global filtering, if any; and
none of the checking prohibit flags (e.g. CHK_LANGDICT_PROHIBIT) set. NOTE: The only exception from the above is when creating OCR zones from user zones that were added before auto-zoning. In this case, OCR zones inherit the attributes of the corresponding user zones.

If auto-zoning uses the method DCM_STANDARD, the process will also attempt finding horizontal and vertical rule lines. If there are user zones, auto-zoning searches for rule lines in WT_AUTO and WT_TABLE zones and also outside the user zones - when the page descriptor allows it (see Page Descriptor defines). If there are no user zones, rule line detection is performed. Rule lines are stored in the page, in a line list. The recognition process (PID_RECOGNITION1 et al.) modifies this line list and it retains only the lines that are outside the OCR zones. After zoning, the line list does not contain dotted, dashed or double style lines (RLSTYLE). This information only becomes available after the recognition process.

OCR zones may be changed by the recognition process (kRecRecognize), because some post-processing operations have such effects. For example, when non-gridded table detection (kRecSetNongriddedTableDetect) runs during the recognition process.

Any zone can be locally overridden with the functions kRecUpdateZone and kRecUpdateOCRZone. These allow you to change the attributes of a zone in the zone list. Note that the fields ZONE::rectBBox, and ZONE::type cannot be modified by kRecUpdateOCRZone.

Adding zones manually

You can choose to search for zones automatically, and/or create your own zones: user zones. To add simple zones to the zone list manually, use the kRecInsertZone function. To add a rectangle to or subtract a rectangle from an existing user zone, use the functions kRecAddZoneRect or kRecSubZoneRect.

Adding zones from a zone file

The third way of creating zones is to have zones read from a file (called a zone file, or in OmniPage terminology, a zone template file) that contains the attributes of previously saved zones. Zones created this way will also be user zones. An integrating application can save the current user zone definitions to a zone file any time with the kRecSaveZones function. The application can load them from a zone file with the kRecLoadZones function.

NOTE: When a zone file is loaded, any previous zones are removed from the page.

If the application calls the kRecRecognize recognizing function on a page with an empty zone list, the page-layout decomposition function is called automatically.

It is recommended to create homogeneous user zones as much as possible, because they may give better results. It is especially important in the case of Asian languages (either CCJK, Arabic, Thai or Hebrew). WT_AUTO zones can be inhomogeneous.

To get information about any particular zone in the image zone list, invoke the kRecGetZoneInfo and kRecGetOCRZoneInfo functions. These functions are useful to find out more about zones created by auto-zoning.

NOTE: When you update a table-type zone with the kRecUpdateZone function, the cell-detection algorithm will not be activated, resulting in improper table detection within the zone. See the description of creation of table information.

Any changes in user zone list (kRecInsertZone, kRecDeleteZone, kRecDeleteAllZones, kRecAddZoneRect, kRecSubZoneRect, kRecLoadZones, kRecUpdateZone, kRecSetZoneLayout) will make OCR zones invalid; the OCR zone list will be emptied and regenerated.

Usage of Page Descriptor

The page description describes the possible layout elements (text, table, graphics and form) on the page outside of the user zones. These layout elements are found by the page parsing (i.e. zoning) and the recognition processes. The page description has no effect inside the user zones. The LZ_COLUMN / LZ_TABLE / LZ_GRAPHICS flags specify how to find text / table / graphic zones.

A valid page descriptor is either a single LZ_FORM flag (see Form Recognition Module), or a combination (logical OR) of several other bits. These combinable bits are the LZ_COLUMN_column, LZ_TABLE_table and LZ_GRAPHICS_graphics flags, (where column, table and graphics can be NO, ONE and AUTO) and the LZ_FREEFORM flag.

The default page descriptor is 0. This means that page parsing does not create OCR zones outside of the user zones. If no user zones were specified the default value 0 is equivalent to LZ_COLUMN_AUTO | LZ_TABLE_AUTO | LZ_GRAPHICS_AUTO, i.e. automatic zoning is done on the whole page detecting these 3 kinds of zones.

When LZ_FREEFORM is combined (OR'd) with the other flags (or used alone, i.e. combined with the default 0), the normal functioning of the page parsing process is changed to create smaller text zones if necessary, in order to optimize OCR. Even though this flag can be combined with any of the LZ_TABLE flags, tabular structures are not detected as table cells, just as separate zones. It makes this flag most useful when form data is to be extracted from the page without re-constructing the page layout in a formatted output. Combining this flag with LZ_GRAPHICS_NO could make OCR accuracy even better when free forms are to be processed and formatted output (with pictures) is not desired.

LZ_GRAPHICS_NO means that no pictures or graphic areas will be returned as WT_GRAPHIC zones. Page parsing still detects such areas on the page, so no text zones are created on those areas that would return just garbage. Anyway, using LZ_GRAPHICS_NO hints the page parser that any characters found on the page should be part of a text zone. This can improve data extraction accuracy on free forms.

The LZ_TABLE_ONE and LZ_GRAPHICS_ONE flags specify that the whole page is either a single table, or a single picture. If one of them is specified, other zone types are not considered for detection. If both LZ_TABLE_ONE and LZ_GRAPHICS_ONE are specified the zoning works as if only LZ_TABLE_ONE was set. These two ONE flags cannot be combined with LZ_FREEFORM.

The DCM_LEGACY and DCM_FAST mode zoning can handle only the following cases:

Page descriptor is 0: this is the default, works as described above
LZ_COLUMN_AUTO | LZ_TABLE_AUTO | LZ_GRAPHICS_AUTO: only when there is no user zone
LZ_COLUMN_ONE | LZ_TABLE_AUTO | LZ_GRAPHICS_AUTO: only when there is no user zone
LZ_COLUMN_NO | LZ_TABLE_NO | LZ_GRAPHICS_NO: only when there are user zones
Any page descriptor value with a single WT_AUTO user zone, which covers the whole page

any other cases cause an error (API_ERROR_ERR).

Page descriptor can be specified by the function kRecSetPageDescription.

IMPORTANT NOTE: if the page descriptor is set to LZ_FORM, there must not be any zones on the page. Note also that LZ_FORM cannot be combined with other flags. See Form Recognition Module for usage details.

Backwards compatible combination: In versions before 22.0 the default value of this setting was LZ_COLUMN_NO | LZ_TABLE_NO | LZ_GRAPHICS_NO having a non-intuitive meaning in cases when there are no user zones defined; in that case it means LZ_COLUMN_AUTO | LZ_TABLE_AUTO | LZ_GRAPHICS_AUTO. While this LZ_COLUMN_NO | LZ_TABLE_NO | LZ_GRAPHICS_NO combination is defined now only when there are user zones, the old "auto" meaning is still maintained for backwards compatibility. Anyway, new applications must use the 0 value for this purpose.

Note that the change in the default value has an effect on previously invalid combinations. For example the meaning of a single LZ_TABLE_AUTO bit value (that was illegal before version 22.0) is like this:

When there are user zones, it means that automatic table detection is to be done on the areas not covered by the user zone. There is no change in this meaning in versions before and after 22.0.
When there are no user zones, the meaning differs between versions: in old versions it was an illegal value, but it was interpreted as LZ_COLUMN_NO | LZ_TABLE_AUTO | LZ_GRAPHICS_NO. Starting from 22.0 it is a legal value, but the meaning is different: unspecified (0) fields have AUTO meaning, so it is equivalent to LZ_COLUMN_AUTO | LZ_TABLE_AUTO | LZ_GRAPHICS_AUTO!

Other related information

Table detection and handling by Table Recognition Module are also parts of this module.

The working of the Zone Handling Module can also be adjusted with settings in some points.

Detection of filling method of zones

There are multiple ways of doing automatic filling method detection:

When a user defined zone's fm field is FM_AUTO_HAND, then automatic machine print vs. handprint detection is done by the kRecLocateZones function. The detection is done zone by zone, on all FM_AUTO_HAND zones. It is the most accurate filling method detection, available from version 22.0, but it works on user defined zones only and detects these two filling methods only (machine print and handprint).
The older kRecDetectFillingMethod function can be used either on user defined FM_DEFAULT zones, or (after calling kRecLocateZones) on automatic OCR zones. When the FILLINGMETHOD* output parameter of this function is NULL, the function works zone by zone, detecting if the zone contains handprint, machine print, dot-matrix print (9 and 24 pin), or barcode.
Another use of this function is when the output argument is not NULL: in this case a single, page-specific filling method value is returned only, that tells what the most general filling method of the page is, considering the specified zones only.
The final case is when kRecDetectFillingMethod is not called and there are FM_DEFAULT zones. Normally in this case there is no filling method detection is done, the zones inherit the value of the default filling method (kRecSetDefaultFillingMethod). But if this default value itself is also FM_DEFAULT, that is a special case: then filling method detection (kRecDetectFillingMethod) is called automatically at the beginning of the recognition process.

If filling method detection cannot determine a type in a given zone, it leaves FM_DEFAULT in the field fm of the zone.

During recognition if both the default filling method and the field ZONE::fm are FM_DEFAULT, the engine supposes FM_OMNIFONT filling method for such zones.

Vertical text support

For Western languages, including Greek and languages using the Cyrillic alphabet, automatic detection of left or right rotated vertical text is available, including detection inside table cells.

Multi-line table cell content can be detected for all input file types, providing the cell borders are visible.
For PDF files containing a text layer auto-detection will find both single- and multi-line vertical texts anywhere on the page.
However for other input file types, beyond table cell handling, only single-line caption-style texts near diagrams or pictures or at the border of the page can be auto-detected.

Auto-detected vertical text zones outside tables take the flags WT_LEFTTEXT or WT_RIGHTTEXT while table cells detected as containing vertical text now include this in the CELL_INFO data.

This auto-detection runs on images with no inserted user zones, or on page portions designated for auto-zoning. Text direction can be forced by inserting user zones into page images containing Western or Cyrillic languages, using the following flags: normal (WT_FLOW), left rotated vertical (WT_LEFTTEXT) or right rotated vertical (WT_RIGHTTEXT). Vertical text user zones must be rectangular; they can be placed anywhere on the page and can cover multi-line texts. To force vertical text handling in a table cell, the required flag should be set in the new text type field inside CELL_INFO.

Automatic left and right text detection can be disabled by switching the Kernel.Decomp.FindRotatedText setting off. Switch this setting off if the processed document does not contain vertical text, because the vertical text detection (and recognition) may increase processing time.

Vertical text can also be auto-detected for CCJK languages. Alternatively, it can be explicitly set by inserting zones with the flags WT_FLOW for horizontal left-to-right text or WT_VERTTEXT for vertical text with top-to-bottom character flow and right-to-left line flow. As for Western languages, CCJK vertical text zones must be rectangular. If an irregular zone is changed to vertical text content, its shape snaps to a bounding rectangle, removing the irregularity. It is not possible to pass text direction information for table cells to the Asian OCR module – it will perform auto-detection.

In the recognition results, the LETTER structure makeup field contains two additional bits to store the text direction. See also which output converters and formatting levels of the RecAPIPlus support the different types of vertical texts.

Macro Definition Documentation

◆ LZ_FORM

#define LZ_FORM 0x01000000

The page contains an unfilled form. Do not create any user zones if you use LZ_FORM! See Form Recognition Module as well. LZ_FORM works with western languages only. Non-supported languages return with the error code NOT_IMPL_ERR.

Typedef Documentation

◆ ZONE

typedef struct _ZONE ZONE

ZONE structure.

The ZONE structure contains all the necessary information for describing a zone. A zone is a rectangular or pizza-box shaped area on an image (Only OMR zones can be non-pizza-box shaped; and table zones and vertical text zones must be rectangular), but ZONE contains only its bounding box. It contains a feature of interest to the user. The image data covered by each zone is handled and processed (typically recognized) separately. A very first classification of the zones is their type: whether a zone's area is to be recognized or it should be treated as graphic. Zones containing textual information can be either flowed type or table type zones. This basic classification of zones helps the Engine to handle the textual information correctly. Zones are always associated with a FILLINGMETHOD filling method and one of the available RECOGNITIONMODULE recognition modules. These determine what kind of recognition algorithm is applied to the zone during its processing. For possible RM-FM pairs see Filling method - Recognition module combinations.

Note: Zones in a page are built into a sequential list, called the zone list. Each zone receives an internal index number, reflecting its position in this list. In some cases this zone order even determines the zone content order in the final output document. The index of the first zone in the zone list is zero (0).; It is recommended to create homogeneous user zones as much as possible, because they may give better results. It is especially important in the case of Asian languages. WT_AUTO zones can be inhomogeneous.

Enumeration Type Documentation

◆ CREATEHANDWRINGPAGEMODE

enum CREATEHANDWRINGPAGEMODE

kRecCreateHandwritingPage modes

These types can be used to specify the processing mode of kRecCreateHandwritingPage.

Enumerator
CHP_FIND_HANDWRITING	hPage will not change. *phPageOut will contain the handwriting only and it will contain the handwriting zones.
CHP_SEPARATE_HANDWRITING	Handwriting will be removed from hPage. *phPageOut will contain only the handwriting, and it will contain the handwriting zones.

◆ FILLINGMETHOD

enum FILLINGMETHOD

Filling methods.

This enumerates the possible content types of the zones from the Engine's perspective. Each zone must have one of the filling methods listed here. It can be done by specifying the fm field of the zones defined on the image.

Note: Each zone needs to be associated with a RECOGNITIONMODULE (RM) and a FILLINGMETHOD (FM). This is needed because some RMs support more than one FM, and some FMs are accepted by more than one RM. It is important to ensure that the RM-FM pair is suitable. For possible RM-FM pairs see Filling method - Recognition module combinations.

Enumerator
FM_DEFAULT	The default zone filling method. The actual zone filling method for all zones of this type will be inquired just before recognition, according to the setting previously specified with a separate call to the kRecSetDefaultFillingMethod function.
FM_OMNIFONT	The omnifont zone filling method. It denotes a machine printed text with any typeface not highly stylized. All platforms.
FM_DRAFTDOT9	The 9-pin draft dot-matrix zone filling method. It denotes a 9-pin draft dot-matrix printout. Supported on: Windows.
FM_BARCODE	The barcode zone filling method. It denotes a zone with 1D or 2D (one- or two-dimensional) barcodes. All platforms.
FM_OMR	The optical mark zone filling method. It denotes a zone with one or more checkboxes that are judged to be marked or unmarked. All platforms.
FM_HANDPRINT	The hand-written zone filling method. It denotes hand-written text within the zone. Supported on: Windows, Linux. and MacOS x64
FM_BRAILLE	This filling method is NOT available.
FM_DRAFTDOT24	The 24-pin draft dot-matrix zone filling method. It denotes a 24-pin draft dot-matrix printout. All platforms.
FM_OCRA	The OCR-A zone filling method. All platforms.
FM_OCRB	The OCR-B zone filling method. All platforms.
FM_MICR	The magnetic ink character filling method. Supported on: Windows, Linux, Embedded Linux. and MacOS x64
FM_BARCODE2D	The 2D barcode zone filling method.
FM_DOTDIGIT	The dot-digit zone filling method. Supported on: Windows.
FM_DASHDIGIT	The dash-digit zone filling method. Supported on: Windows.
FM_RESERVED_2	Internal use only.
FM_CMC7	The CMC7 font zone filling method. Supported on: Windows, Linux, Embedded Linux. and MacOS x64
FM_NO_OCR	No recognition will be attempted.
FM_AUTO_HAND	Automatic detection: the zone is either machineprint or handprint
FM_AUTO_BAR	Automatic detection: the zone is either machineprint or barcode
FM_MRZ	The MRZ zone filling method. It denotes a 2 or 3-line Machine Readable Zone area on ID cards. All platforms.
FM_SIZE	Number of zone filling methods.

◆ IMG_DECOMP

enum IMG_DECOMP

Page parse method.

This enum lists the possible values of the Page parser algorithm settings of the Engine. This setting makes it possible to specify one of the three different page parser algorithms for Latin-alphabet languages, or one of the two different algorithms for CCJK and Arabic languages. In the latter case DCM_LEGACY and DCM_FAST are the same. In addition, this setting co-operating with trade-off affects on the accuracy of embedded English text recognition of the Arabic Recognition module.

Enumerator
DCM_AUTO	Automatic. The Engine determines at runtime which algorithm will be applied. This depends on the trade-off setting: DCM_LEGACY algorithm is applied when trade-off is TO_FAST, otherwise DCM_STANDARD is applied (Default.)
DCM_LEGACY	Simple page decomposition. This is faster but less accurate than Standard.
DCM_STANDARD	The most accurate page parsing module.
DCM_FAST	For non-Asian languages this algorithm can be treated as a simplified variant of `DCM_LEGACY:` this algorithm does not try to find tables on the page. It is the fastest (but least accurate) parsing algorithm available. Use it only for simple page parsing tasks! For CCJK and Arabic languages this is the same as `DCM_LEGACY`.

◆ LOCATEHANDWRITINGMODE

enum LOCATEHANDWRITINGMODE

kRecLocateHandwriting modes

These types can be used to specify the processing mode of kRecLocateHandwriting.

Enumerator
LH_RETAIN_HANDWRITING	Find handwriting and remove anything else from hPage. Return the handwriting zones.
LH_REMOVE_HANDWRITING	Remove handwriting from hPage.

◆ RECOGNITIONMODULE

enum RECOGNITIONMODULE

Recognition modules (Engines)

This enumerates the different recognition modules of the Engine available to the integrating application. All zones must have an assigned recognition module in their rm fields before processing.

Note: Only those recognition modules that have been supplied, licensed and successfully installed in the Engine configuration will be available.; The RM_AUTO setting can be specifically placed in the zone's rm field. It is also generated by the kRecLocateZones (auto-zoning) function and, under specific circumstances, by the function kRecDetectFillingMethod.; Each zone needs to be associated with a RECOGNITIONMODULE (RM) and a FILLINGMETHOD (FM). This is needed because some RMs support more than one FM, and some FMs are accepted by more than one RM. It is important to ensure that the RM-FM pair is suitable. For possible RM-FM pairs see Filling method - Recognition module combinations.

Enumerator
RM_AUTO	Specifies that the Engine will automatically attempt to select the most suitable recognition module for the zone. This will be determined just before recognition, according to the zone's filling method and, if necessary, other settings (most typically the Character Set).
RM_OMNIFONT_MTX	Specifies the M/TEXT omnifont recognition module. Supported on: Windows.
RM_OMNIFONT_MOR	Specifies the multi-lingual MOR omnifont recognition module. Supported on: Windows, Linux. and MacOS x64
RM_DOT	Specifies the DOT 9-pin draft dot-matrix recognition module. Supported on: Windows.
RM_BAR	Specifies the BAR barcode recognition module. All platforms.
RM_OMR	Specifies the OMR optical mark recognition module. All platforms.
RM_HNR	Specifies the HNR handprinted numeral recognition module. Supported on: Windows.
RM_RER	Specifies the third-party RER reRecognition Handprint recognition module. Supported on: Windows, Linux. and MacOS x64
RM_BRA	This recognition module is NOT supported.
RM_MAT	Specifies the MAT matrix matching recognition module. Supported on: Windows.
RM_RESERVED_P	Internal use only. Must NOT be selected.
RM_OMNIFONT_PLUS2W	2-way voting engine. All platforms.
RM_OMNIFONT_FRX	FRX engine. All platforms.
RM_OMNIFONT_PLUS3W	3-way voting engine. Supported on: Windows, Linux, MacOS.
RM_ASIAN	Asian engine from Tsinghua All platforms.
RM_RESERVED_M	Internal use only. Must NOT be selected.
RM_RESERVED_A	Internal use only. Must NOT be selected.
RM_ARABIC	Arabic engine from Tsinghua All platforms.
RM_SIZE	Number of available recognition modules.

◆ ZONE_BORDER_TYPE

enum ZONE_BORDER_TYPE

Zone border types.

Types of zone border of printed handwritten text. See kRecSetZoneBorder.

Enumerator
ZBT_PURE_TEXT	Pure text without border.
ZBT_FRAMED_PURE_TEXT	The text is enclosed by a frame.
ZBT_SHORT_COMB	The text is in a comb.
ZBT_FRAMED_SHORT_COMB	The text is in a comb which is enclosed by a frame.
ZBT_INDIVIDUAL_BOXES	The characters are in separate boxes.
ZBT_FRAMED_FULL_COMB	The text is enclosed by a frame, the characters are separated by vertical lines.
ZBT_UNDERLINED_TEXT	The text is underlined.
ZBT_GRAY_BOXES	The characters are in white fields on gray background.
ZBT_SIZE	Number of border types.

◆ ZONETYPE

enum ZONETYPE

Basic zone types.

Enumerator
WT_FLOW	Flowed text. This zone type means that the zone contains textual information arranged horizontally without a table type structure inside. Inside a user zone of this type kRecLocateZones creates one OCR zone of the same type. It can be in OCR zones and user zones. It can also be used for horizontally appearing CCJK characters.
WT_TABLE	Table type zone. This type means that the zone contains a table, i.e. rows and columns, with or without a grid. Such zones will be handled differently from flowed text type zones. Inside a user zone of this type kRecLocateZones creates one OCR zone of the same type. The Engine will try to reconstruct as much of the original table text layout of the zone as the final output document format supports.
WT_GRAPHIC	Graphic type zone. This type of zone contains graphics, i.e. this zone will not be recognized at all and all other recognition related settings will be ignored. The only reason to have such a zone is to save or export the image inside it. Inside a user zone of this type kRecLocateZones creates one OCR zone of the same type.
WT_AUTO	Inside a user zone of this type kRecLocateZones performs a parsing algorithm and it may create several OCR zones of any types except `WT_AUTO`, `WT_IGNORE` and `WT_FORM`. This type can only be in user zones.
WT_IGNORE	Ignore zone. kRecLocateZones does not create OCR zones inside a user zone of this type.
WT_FORM	Form zone. Logical Form Recognition will run within this zone. It indicates an unfilled form and it should be set in the user zone before running kRecLocateZones. kRecLocateZones creates one OCR zone of the same type, the created OCR zone contains a description of the form objects. See also Form Recognition Module.
WT_VERTTEXT	Vertical text. For CCJK characters only.
WT_LEFTTEXT	Left rotated text. For Latin, Greek and Cyrillic characters only.
WT_RIGHTTEXT	Right rotated text. For Latin, Greek and Cyrillic characters only.

Function Documentation

◆ kRecAddZoneRect()

RECERR RECAPIKRN kRecAddZoneRect	(	HPAGE	hPage,
		IMAGEINDEX	iiImg,
		const RECT *	pRect,
		int	nZone )

Adding a rectangle to a user zone.

This function adds a new rectangle to an existing user zone. It creates their union. Because the new rectangle can overlap previous rectangles the function recalculates the rectangle list of the zone. The resulting union must be pizza-box except in the case of OMR zones. Of course, table zones and vertical text zones cannot receive new rectangles.

Parameters

[in]	hPage	Handle of the page.
[in]	iiImg	The index of the image whose coordinate system you have used in defining the rectangle to be added. It is recommended to use II_CURRENT coordinates if possible.
[in]	pRect	The rectangle to be added to the given user zone.
[in]	nZone	The index of the user zone the new rectangle is added to.

Return values

RECERR

Note: The insertion of a new irregular zone can be performed only by rectangles. The first rectangle must be inserted by kRecInsertZone. The additional rectangles must be added by kRecAddZoneRect or removed by kRecSubZoneRect. The rectangle list of the given zone is automatically updated (the overlapping rectangles are recalculated, when subtracting the remaining (maybe non-rectangular) area is broken into rectangular areas, etc.), thus the ZONE passed to the function kRecInsertZone must have only the bounding box of the irregular zone’s first rectangle (it must not be the bounding box of the whole zone).; The CSDK does not support overlapping user zones, thus newly added rectangles must be disjoint with other zones (of course, except the nZone-th one).; Note that the index of the first zone in the zone list is zero (0).; Any changes in the user zone list provokes the deletion of the OCR zone list.; The specification of this function in C# is:
RECERR kRecAddZoneRect(IntPtr hPage, IMAGEINDEX iiImg, RECT pRect, int nZone);

RECERR
RECERR
Error codes.
Definition RECERR_doc.h:19

IMAGEINDEX
IMAGEINDEX
Index of each image type in HPAGE.
Definition KernelApi.h:991

kRecAddZoneRect
RECERR RECAPIKRN kRecAddZoneRect(HPAGE hPage, IMAGEINDEX iiImg, const RECT *pRect, int nZone)
Adding a rectangle to a user zone.; The specification of this function in Java is:
int kRecAddZoneRect(HPAGE hPage, IMAGEINDEX iiImg, RECT pRect, int nZone)

HPAGE
struct RECPAGESTRUCT * HPAGE
Handle of a page in memory.
Definition KernelApi.h:289; The specification of this function in Python is:
def kRecAddZoneRect(hPage: "HPAGE", iiImg: int, pRect: "RECT", nZone: int) -> int

◆ kRecCopyOCRZones()

RECERR RECAPIKRN kRecCopyOCRZones ( HPAGE hPage )

Copying the OCR zone list to a user zone list.

This function copies the OCR zones in the place of user zones. It leaves the OCR zones intact, but deletes the former user zones. It can be used to delete/insert/change user zones based on the OCR zones detected by a previous kRecLocateZones.

Parameters

[in] hPage Handle of the page.

Return values

RECERR

Note: The specification of this function in C# is:
RECERR kRecCopyOCRZones(IntPtr hPage);

kRecCopyOCRZones
RECERR RECAPIKRN kRecCopyOCRZones(HPAGE hPage)
Copying the OCR zone list to a user zone list.; The specification of this function in Java is:
int kRecCopyOCRZones(HPAGE hPage); The specification of this function in Python is:
def kRecCopyOCRZones(hPage: "HPAGE") -> int

◆ kRecCreateHandwritingPage()

RECERR RECAPIKRN kRecCreateHandwritingPage	(	int	sid,
		HPAGE	hPage,
		HPAGE *	phPageOut,
		CREATEHANDWRINGPAGEMODE	mode,
		LPCRECT	pRect )

Find/separate handwriting on the given image.

This function finds the handwriting on the page and creates a new HPAGE that will contain only the handwriting. The new HPAGE will contains the handwriting zones. Depending on the mode parameter the original HPAGE will not change or the handwriting will be removed from it. It is useful when these areas will be recognized by RM_RER or RM_HNR or another handwritten text recognizer. See also kRecLocateHandwriting.

Parameters

[in]	sid	Settings Collection ID.
[in]	hPage	Handle of the page.
[in]	phPageOut	Address of a variable to store the handle of the created page that contains only the handwritings. Must be freed with kRecFreeImg if no longer needed.
[in]	mode	Specifies whether to change the original hPage or not.
[in]	pRect	Pointer to the rectangular area to be detected (II_CURRENT coordinate system is used). If it is NULL the entire image is processed.

Return values

RECERR

Note: See the description about processing handwritten text.; The specification of this function in C# is:
RECERR kRecCreateHandwritingPage(int sid, IntPtr hPage, out IntPtr phPage,

CREATEHANDWRINGPAGEMODE mode, RECT UserRect);

CREATEHANDWRINGPAGEMODE
CREATEHANDWRINGPAGEMODE
kRecCreateHandwritingPage modes
Definition KernelApi.h:3311

kRecCreateHandwritingPage
RECERR RECAPIKRN kRecCreateHandwritingPage(int sid, HPAGE hPage, HPAGE *phPageOut, CREATEHANDWRINGPAGEMODE mode, LPCRECT pRect)
Find/separate handwriting on the given image.; The specification of this function in C# Objects is:
Page.CreateHandwritingPage(out Page page, CREATEHANDWRINGPAGEMODE mode, RECT UserRect);; The specification of this function in Java is:
int kRecCreateHandwritingPage(int sid, HPAGE hPage, HPAGE phPageOut, CREATEHANDWRINGPAGEMODE mode, RECT pRect); The specification of this function in Python is:
def kRecCreateHandwritingPage(sid: int, hPage: "HPAGE", mode: "CREATEHANDWRINGPAGEMODE", pRect: "RECT") -> Tuple[int, "HPAGE"]

◆ kRecDeleteAllZones()

RECERR RECAPIKRN kRecDeleteAllZones ( HPAGE hPage )

Deleting all user zones.

This function deletes all zones both in the user and the OCR zone list for the page.

Parameters

[in] hPage Handle of the page.

Return values

RECERR

Note: The specification of this function in C# is:
RECERR kRecDeleteAllZones(IntPtr hPage);

kRecDeleteAllZones
RECERR RECAPIKRN kRecDeleteAllZones(HPAGE hPage)
Deleting all user zones.; The specification of this function in Java is:
int kRecDeleteAllZones(HPAGE hPage); The specification of this function in Python is:
def kRecDeleteAllZones(hPage: "HPAGE") -> int

◆ kRecDeleteZone()

RECERR RECAPIKRN kRecDeleteZone	(	HPAGE	hPage,
		int	nZone )

Deleting a user zone.

This function deletes a zone from the user zone list of the page.

Parameters

[in]	hPage	The handle of the page.
[in]	nZone	Index of the user zone to be deleted.

Return values

RECERR

Note: Note that the index of the first zone in the zone list is zero (0).; Any changes in the user zone list provokes deletion of the OCR zone list.; The specification of this function in C# is:
RECERR kRecDeleteZone(IntPtr hPage, int nZone);

kRecDeleteZone
RECERR RECAPIKRN kRecDeleteZone(HPAGE hPage, int nZone)
Deleting a user zone.; The specification of this function in Java is:
int kRecDeleteZone(HPAGE hPage, int nZone); The specification of this function in Python is:
def kRecDeleteZone(hPage: "HPAGE", nZone: int) -> int

◆ kRecGetDecompMethod()

RECERR RECAPIKRN kRecGetDecompMethod	(	int	sid,
		IMG_DECOMP *	pAlgorithm )

Getting the decomp method.

This function inquires the current Page parser algorithm setting of the Engine.

Parameters

[in]	sid	Settings Collection ID.
[out]	pAlgorithm	The current page parser algorithm.

Return values

RECERR

Note: This function gets the value of the setting Kernel.Decomp.Method. This setting can be changed by kRecSetDecompMethod.; The specification of this function in C# is:
RECERR kRecGetDecompMethod(int sid, out IMG_DECOMP decompAlg);

IMG_DECOMP
IMG_DECOMP
Page parse method.
Definition KernelApi.h:2363

kRecGetDecompMethod
RECERR RECAPIKRN kRecGetDecompMethod(int sid, IMG_DECOMP *pAlgorithm)
Getting the decomp method.; The specification of this function in Java is:
int kRecGetDecompMethod(int sid, IMG_DECOMP[] pAlgorithm); The specification of this function in Python is:
def kRecGetDecompMethod(sid: int) -> Tuple[int, int]

◆ kRecGetForceSingleColumn()

RECERR RECAPIKRN kRecGetForceSingleColumn	(	int	sid,
		INTBOOL *	pbForceSingle )

Getting the setting of Force Single Column mode.

This function inquires the current setting of the Force Single Column mode.

Parameters

[in]	sid	Settings Collection ID.
[out]	pbForceSingle	Address of a Boolean variable to hold the current Force Single Column mode setting.

Return values

RECERR

Note: This function gets the value of the setting Kernel.Decomp.ForceSingleColumn. This setting can be changed by kRecSetForceSingleColumn.; The specification of this function in C# is:
RECERR kRecGetForceSingleColumn(int sid, out bool bEnable);

kRecGetForceSingleColumn
RECERR RECAPIKRN kRecGetForceSingleColumn(int sid, INTBOOL *pbForceSingle)
Getting the setting of Force Single Column mode.; The specification of this function in Java is:
int kRecGetForceSingleColumn(int sid, int[] pbForceSingle); The specification of this function in Python is:
def kRecGetForceSingleColumn(sid: int) -> Tuple[int, bool]

◆ kRecGetNongriddedTableDetect()

RECERR RECAPIKRN kRecGetNongriddedTableDetect	(	int	sid,
		INTBOOL *	bEnable )

Getting the non-gridded table detection setting.

This function specifies whether the non-gridded table detection feature of the Engine is enabled.

Parameters

[in]	sid	Settings Collection ID.
[out]	bEnable	The value of the current non-gridded table detection setting.

Return values

RECERR

Note: This function gets the value of the setting Kernel.Decomp.EnableNonGrid. This setting can be changed by kRecSetNongriddedTableDetect.; The specification of this function in C# is:
RECERR kRecGetNongriddedTableDetect(int sid, out bool bEnable);

kRecGetNongriddedTableDetect
RECERR RECAPIKRN kRecGetNongriddedTableDetect(int sid, INTBOOL *bEnable)
Getting the non-gridded table detection setting.; The specification of this function in Java is:
int kRecGetNongriddedTableDetect(int sid, int[] bEnable); The specification of this function in Python is:
def kRecGetNongriddedTableDetect(sid: int) -> Tuple[int, bool]

◆ kRecGetOCRZoneCount()

RECERR RECAPIKRN kRecGetOCRZoneCount	(	HPAGE	hPage,
		int *	pnOCRZones )

Getting the OCR zone count.

This function gets the number of zones in the OCR zone list of the page.

Parameters

[in]	hPage	Handle of the page.
[out]	pnOCRZones	Address of an integer variable to get the number of zones.

Return values

RECERR

Note: The specification of this function in C# is:
RECERR kRecGetOCRZoneCount(IntPtr hPage, out int ZoneCount);

kRecGetOCRZoneCount
RECERR RECAPIKRN kRecGetOCRZoneCount(HPAGE hPage, int *pnOCRZones)
Getting the OCR zone count.; The specification of this function in Java is:
int kRecGetOCRZoneCount(HPAGE hPage, int[] pnOCRZones); The specification of this function in Python is:
def kRecGetOCRZoneCount(hPage: "HPAGE") -> Tuple[int, int]

◆ kRecGetOCRZoneData()

RECERR RECAPIKRN kRecGetOCRZoneData	(	HPAGE	hPage,
		IMAGEINDEX	iiImg,
		LPZONEDATA	pOCRZoneData,
		int	nOCRZone )

Getting additional information about OCR zones.

This function can be used for getting additional information about any OCR zone in the OCR zone list of the page.

Parameters

[in]	hPage	Handle of the page.
[in]	iiImg	Index of the image in the page whose coordinate system is to be used to report the zone's boundary box.
[out]	pOCRZoneData	Pointer to a variable for storing the requested zone-data information.
[in]	nOCRZone	Index of the zone in the zone list, from which the information is requested.

Return values

RECERR

Note: The index of the first zone in the zone list is zero (0).; The specification of this function in C# is:
RECERR kRecGetOCRZoneData(IntPtr hPage, IMAGEINDEX iiImg, out ZONEDATA pOCRZoneData, int nOCRZone);

kRecGetOCRZoneData
RECERR RECAPIKRN kRecGetOCRZoneData(HPAGE hPage, IMAGEINDEX iiImg, LPZONEDATA pOCRZoneData, int nOCRZone)
Getting additional information about OCR zones.

ZONEDATA
ZONEDATA structure.
Definition KernelApi.h:1708; The specification of this function in Java is:
int kRecGetOCRZoneData(HPAGE hPage, IMAGEINDEX iiImg, ZONEDATA pOCRZoneData, int nOCRZone); The specification of this function in Python is:
def kRecGetOCRZoneData(hPage: "HPAGE", iiImg: int, nOCRZone: int) -> Tuple[int, "ZONEDATA"]

◆ kRecGetOCRZoneInfo()

RECERR RECAPIKRN kRecGetOCRZoneInfo	(	HPAGE	hPage,
		IMAGEINDEX	iiImg,
		LPZONE	pOCRZone,
		int	nOCRZone )

Getting OCR zone information.

This function can be used for getting information about any zone in the OCR zone list of the page.

Parameters

[in]	hPage	Handle of the page.
[in]	iiImg	Index of the image in the page whose coordinate system is used to report the zone's boundary box.
[out]	pOCRZone	Pointer to a variable for storing the requested zone information.
[in]	nOCRZone	Index of the zone in the zone list, from which the information is requested.

Return values

RECERR

Note: The index of the first zone in the zone list is zero (0).; The specification of this function in C# is:
RECERR kRecGetOCRZoneInfo(IntPtr hPage, IMAGEINDEX iiImage, out ZONE pZone, int nZone);

kRecGetOCRZoneInfo
RECERR RECAPIKRN kRecGetOCRZoneInfo(HPAGE hPage, IMAGEINDEX iiImg, LPZONE pOCRZone, int nOCRZone)
Getting OCR zone information.

_ZONE
ZONE structure.
Definition KernelApi.h:1675; The specification of this function in Java is:
int kRecGetOCRZoneInfo(HPAGE hPage, IMAGEINDEX iiImg, ZONE pOCRZone, int nOCRZone); The specification of this function in Python is:
def kRecGetOCRZoneInfo(hPage: "HPAGE", iiImg: int, nOCRZone: int) -> Tuple[int, "ZONE"]

◆ kRecGetOCRZoneLayout()

RECERR RECAPIKRN kRecGetOCRZoneLayout	(	HPAGE	hPage,
		IMAGEINDEX	iiImg,
		LPRECT *	ppRects,
		int *	pnRects,
		int	nZone )

Getting OCR zone shape information.

This function can be used for getting information about the shape of any zone in the OCR zone list of the hPage page. For more information about the possible shape of the zones see the definition of pizza-box.

Parameters

[in]	hPage	Handle of the page.
[in]	iiImg	Index of the image in the page whose coordinate system is used to report the shape information.
[out]	ppRects	Pointer to an array of RECTs for storing the requested shape information.
[out]	pnRects	Pointer to number variable for storing the number of rectangles in the `ppRects` array.
[in]	nZone	Index of the zone in the zone list, from which the information is requested.

Return values

RECERR

Note: The index of the first zone in the zone list is zero (0).; Since this function creates a new memory object containing the shape RECTs, after evaluating the result, the application should call the kRecFree function to free this memory area.; The specification of this function in C# is:
RECERR kRecGetOCRZoneLayout(IntPtr hPage, IMAGEINDEX iiImg, out RECT[] ppRects, int nZone);

kRecGetOCRZoneLayout
RECERR RECAPIKRN kRecGetOCRZoneLayout(HPAGE hPage, IMAGEINDEX iiImg, LPRECT *ppRects, int *pnRects, int nZone)
Getting OCR zone shape information.; The specification of this function in Java is:
int kRecGetOCRZoneLayout(HPAGE hPage, IMAGEINDEX iiImg, RectArray ppRects, int nZone); The specification of this function in Python is:
def kRecGetOCRZoneLayout(hPage: "HPAGE", iiImg: int, nZone: int) -> Tuple[int, "RectArray"]

◆ kRecGetOCRZoneNodeArray()

RECERR RECAPIKRN kRecGetOCRZoneNodeArray	(	HPAGE	hPage,
		IMAGEINDEX	iiImg,
		LPPOINT *	ppPoints,
		int *	pnNodes,
		int	iZone )

Getting the polygon of the OCR zone.

This function retrieves the polygon made up of the OCR zone's vertices. This can be useful for an application with a GUI for drawing irregular zones.

Parameters

[in]	hPage	The handle of the page.
[in]	iiImg	Index of the image in the page whose coordinate system is used to report the points.
[out]	ppPoints	The pointer of the array of polygon points. This array is allocated by the function and can be freed via calling the function kRecFree.
[out]	pnNodes	The pointer of an integer retrieving the number of polygon vertices.
[in]	iZone	The index of the OCR zone in question.

Return values

RECERR

Note: The index of the first zone in the zone list is zero (0).; You cannot call this function on OMR zones (see FM_OMR, RM_OMR), because they may have non-pizzabox shapes.; If iiImg is II_ORIGINAL the polygon may have slanting edges.; Since this function creates a new memory object containing the nodes, the application should call the kRecFree function to free this memory area after evaluating the result.; The specification of this function in C# is:
RECERR kRecGetOCRZoneNodeArray(IntPtr hPage, IMAGEINDEX iiImg, out POINT[] ppPoints, int nZone);

kRecGetOCRZoneNodeArray
RECERR RECAPIKRN kRecGetOCRZoneNodeArray(HPAGE hPage, IMAGEINDEX iiImg, LPPOINT *ppPoints, int *pnNodes, int iZone)
Getting the polygon of the OCR zone.; The specification of this function in Java is:
int kRecGetOCRZoneNodeArray(HPAGE hPage, IMAGEINDEX iiImg, PointArray ppPoints, int iZone); The specification of this function in Python is:
def kRecGetOCRZoneNodeArray(hPage: "HPAGE", iiImg: int, iZone: int) -> Tuple[int, "PointArray"]

◆ kRecGetPageDescription()

RECERR RECAPIKRN kRecGetPageDescription	(	int	sid,
		DWORD *	pPageDesc )

Getting page description data.

This function gets the current page description data.

Parameters

[in]	sid	Settings Collection ID.
[out]	pPageDesc	The actual Page Descriptor.

Return values

RECERR

Note: This function gets the value of the setting Kernel.Decomp.PageDesc. This setting can be changed by kRecSetPageDescription.; The specification of this function in C# is:
RECERR kRecGetPageDescription(int sid, out PAGEDESCRIPTION pPageDesc);

kRecGetPageDescription
RECERR RECAPIKRN kRecGetPageDescription(int sid, DWORD *pPageDesc)
Getting page description data.; The specification of this function in Java is:
int kRecGetPageDescription(int sid, long[] pPageDesc); The specification of this function in Python is:
def kRecGetPageDescription(sid: int) -> Tuple[int, int]

◆ kRecGetZoneBorder()

RECERR RECAPIKRN kRecGetZoneBorder	(	HPAGE	hPage,
		int	nZone,
		ZONE_BORDER_TYPE *	pZoneBorderType,
		int *	pCellCount )

Getting the border type of zone.

This function gets the border type of a zone.

Parameters

[in]	hPage	Handle of the page.
[in]	nZone	Index of the user zone in question.
[out]	pZoneBorderType	Address of a variable to get the border type, see: ZONE_BORDER_TYPE.
[out]	pCellCount	Address of a variable to get the number of comb cells or boxes.

Return values

RECERR

Note: The index of the first zone in the zone list is zero (0).; The specification of this function in C# is:
RECERR kRecGetZoneBorder(IntPtr hPage, nZone, out ZoneBorder, out int CellCount);

kRecGetZoneBorder
RECERR RECAPIKRN kRecGetZoneBorder(HPAGE hPage, int nZone, ZONE_BORDER_TYPE *pZoneBorderType, int *pCellCount)
Getting the border type of zone.; The specification of this function in Java is:
int kRecGetZoneBorder(HPAGE hPage, nZone, ZoneBorder, int[] pCellCount); The specification of this function in Python is:
def kRecGetZoneBorder(hPage: "HPAGE", nZone: int) -> Tuple[int, int, int]

◆ kRecGetZoneCount()

RECERR RECAPIKRN kRecGetZoneCount	(	HPAGE	hPage,
		int *	pnZones )

Getting the user zone count.

This function gets the number of zones in the user zone list for the page.

Parameters

[in]	hPage	Handle of the page.
[out]	pnZones	Address of an integer variable to get the number of zones.

Return values

RECERR

Note: The specification of this function in C# is:
RECERR kRecGetZoneCount(IntPtr hPage, out int pnZones);

kRecGetZoneCount
RECERR RECAPIKRN kRecGetZoneCount(HPAGE hPage, int *pnZones)
Getting the user zone count.; The specification of this function in Java is:
int kRecGetZoneCount(HPAGE hPage, int[] pnZones); The specification of this function in Python is:
def kRecGetZoneCount(hPage: "HPAGE") -> Tuple[int, int]

◆ kRecGetZoneInfo()

RECERR RECAPIKRN kRecGetZoneInfo	(	HPAGE	hPage,
		IMAGEINDEX	iiImg,
		LPZONE	pZone,
		int	nZone )

Getting user zone information.

This function can be used for getting information about any zone in the user zone list of the page.

Parameters

[in]	hPage	Handle of the page.
[in]	iiImg	Index of the image in the page, whose coordinate system is used to report the zone's boundary box.
[out]	pZone	Pointer to a variable for storing the requested zone information.
[in]	nZone	Index of the zone in the zone list, from which the information is requested.

Return values

RECERR

Note: Note that the index of the first zone in the zone list is zero (0).; The specification of this function in C# is:
RECERR kRecGetZoneInfo(IntPtr hPage, IMAGEINDEX iiImg, out ZONE pZone, int nZone);

kRecGetZoneInfo
RECERR RECAPIKRN kRecGetZoneInfo(HPAGE hPage, IMAGEINDEX iiImg, LPZONE pZone, int nZone)
Getting user zone information.; The specification of this function in Java is:
int kRecGetZoneInfo(HPAGE hPage, IMAGEINDEX iiImg, ZONE pZone, int nZone); The specification of this function in Python is:
def kRecGetZoneInfo(hPage: "HPAGE", iiImg: int, nZone: int) -> Tuple[int, "ZONE"]

◆ kRecGetZoneLayout()

RECERR RECAPIKRN kRecGetZoneLayout	(	HPAGE	hPage,
		IMAGEINDEX	iiImg,
		LPRECT *	ppRects,
		int *	pnRects,
		int	iZone )

Getting user zone shape information.

This function can be used for getting information about the shape of any zone in the user zone list of the hPage page. For more information about the possible zone shapes see the definition of pizza-box.

Parameters

[in]	hPage	Handle of the page.
[in]	iiImg	Index of the image in the page, whose coordinate system is used to report the requested zone shape.
[out]	ppRects	Pointer to an array of RECTs for storing the requested shape information.
[out]	pnRects	Pointer to a number variable for storing the number of rectangles in the `ppRects` array.
[in]	iZone	Index of the zone in the zone list, from which the information is requested.

Return values

RECERR

Note: Note that the index of the first zone in the zone list is zero (0).; Since this function creates a new memory object containing the shape RECTs, after evaluating the result, the application should call the kRecFree function to free this memory area.; The specification of this function in C# is:
RECERR kRecGetZoneLayout(IntPtr hPage, IMAGEINDEX iiImg, out RECT[] ppRects, int nZone);

kRecGetZoneLayout
RECERR RECAPIKRN kRecGetZoneLayout(HPAGE hPage, IMAGEINDEX iiImg, LPRECT *ppRects, int *pnRects, int iZone)
Getting user zone shape information.; The specification of this function in Java is:
int kRecGetZoneLayout(HPAGE hPage, IMAGEINDEX iiImg, RectArray ppRects, int iZone); The specification of this function in Python is:
def kRecGetZoneLayout(hPage: "HPAGE", iiImg: int, iZone: int) -> Tuple[int, "RectArray"]

◆ kRecGetZoneNodeArray()

RECERR RECAPIKRN kRecGetZoneNodeArray	(	HPAGE	hPage,
		IMAGEINDEX	iiImg,
		LPPOINT *	ppPoints,
		int *	pnNodes,
		int	iZone )

Getting the polygon of the user zone.

This function retrieves the polygon made up of the vertices of the user zone. This can be useful for an application with a GUI when drawing irregular zones.

Parameters

[in]	hPage	The handle of the page.
[in]	iiImg	Index of the image in the page, whose coordinate system is used to report the points.
[out]	ppPoints	The pointer of the array of polygon points. This array is allocated by the function and can be freed calling the function kRecFree.
[out]	pnNodes	The pointer of an integer retrieving the number of polygon vertices.
[in]	iZone	The index of the user zone in question.

Return values

RECERR

Note: Note that the index of the first zone in the zone list is zero (0).; You cannot call this function on OMR zones (see FM_OMR, RM_OMR), because they may have non-pizzabox shapes.; If iiImg is II_ORIGINAL the polygon may have slanting edges due to the deskew operation.; Since this function creates a new memory object containing the nodes, the application should call the kRecFree function to free this memory area after evaluating the result.; The specification of this function in C# is:
RECERR kRecGetZoneNodeArray(IntPtr hPage, IMAGEINDEX iiImg, out POINT[] ppPoints, int nZone);

kRecGetZoneNodeArray
RECERR RECAPIKRN kRecGetZoneNodeArray(HPAGE hPage, IMAGEINDEX iiImg, LPPOINT *ppPoints, int *pnNodes, int iZone)
Getting the polygon of the user zone.; The specification of this function in Java is:
int kRecGetZoneNodeArray(HPAGE hPage, IMAGEINDEX iiImg, PointArray ppPoints, int iZone); The specification of this function in Python is:
def kRecGetZoneNodeArray(hPage: "HPAGE", iiImg: int, iZone: int) -> Tuple[int, "PointArray"]

◆ kRecInitZone()

void RECAPIKRN kRecInitZone ( LPZONE pZone )

Initializing a ZONE variable.

This function initializes a ZONE variable to default values.

Parameters

[in] pZone Pointer to the zone structure to be initialized.

Return values

none

Note: All the fields of the structure are set to a default value:
type = WT_FLOW;

fm = FM_DEFAULT;

rm = RM_AUTO;

filter = FILTER_DEFAULT;

chk_control = 0;

chk_fn = NULL;

chk_sect = "";

userdata = 0;

FILTER_DEFAULT
@ FILTER_DEFAULT
Definition KernelApi.h:731

WT_FLOW
@ WT_FLOW
Definition KernelApi.h:1550

RM_AUTO
@ RM_AUTO
Definition KernelApi.h:606

FM_DEFAULT
@ FM_DEFAULT
Definition KernelApi.h:553

The bounding box (ZONE::rectBBox) is set to 0, the real value must be set after this call.; In a C++ program it might be useful to create a CZone class based on ZONE and call kRecInitZone in the constructor.; The specification of this function in C# is:
RECERR kRecInitZone([In, Out] ZONE zone);

kRecInitZone
void RECAPIKRN kRecInitZone(LPZONE pZone)
Initializing a ZONE variable.; The specification of this function in Java is:
void kRecInitZone(ZONE pZone); The specification of this function in Python is:
def kRecInitZone() -> "ZONE"

◆ kRecInsertZone()

RECERR RECAPIKRN kRecInsertZone	(	HPAGE	hPage,
		IMAGEINDEX	iiImg,
		LPCZONE	pZone,
		int	nZone )

Inserting a user zone.

This function inserts a new zone in the user zone list of the page. After inserting the zone, the zone list will be recalculated automatically. For information about insertion of irregular zones see notes.

Parameters

[in]	hPage	Handle of the page.
[in]	iiImg	Index of the image in the page whose coordinate system you have used in defining the boundary box for the new zone. It is recommended to use II_CURRENT coordinates if possible.
[in]	pZone	Pointer to the zone data to be inserted.
[in]	nZone	Index in the user zone list where the new zone should be inserted. Use zero (0) to insert the zone as the first element of the zone list. To insert a zone as the last element of the zone list, use -1.

Return values

RECERR

Note

The insertion of a new irregular zone can be performed only by rectangles. The first rectangle must be inserted by kRecInsertZone. The additional rectangles must be added by kRecAddZoneRect or removed by kRecSubZoneRect. The rectangle list of the given zone is automatically updated (the overlapping rectangles are recalculated; when zones are subtracted, the remaining (maybe non-rectangular) area is broken into rectangular areas, etc.), thus the ZONE passed to the function kRecInsertZone must have only the bounding box of the irregular zone’s first rectangle (it must not be the bounding box of the whole zone).

When inserting a zone, the application should initialize all the fields of the ZONE structure. The kRecInitZone function could be used for this purpose. The ZONE::rectBBox field does not have a meaningful default value, so it must always be directly initialized after kRecInitZone. If other fields are not changed then a WT_FLOW zone is created this way with default features.

The ZONE::type field of the zone to be inserted can be any ZONETYPE.

For successful zone insertion:

the whole zone area must lie within the page's OCR image area,
the whole zone area must lie within the given image's area,
the top left coordinates of the zone must have lower values than those of the bottom right ones,
the zone must not overlap another zone.

It is the programmer's responsibility to specify a valid recognition module-filling method pair, otherwise any incorrectly set zones will have no recognition result. For more details, see the topic Filling method - Recognition module combinations.

Any changes in the user zone list provokes deletion of the OCR zone list.

Insertion of a zone (containing machine printed uppercase flowed text):

HPAGE hPage;
ZONE zone;
kRecInitZone(&zone);
zone.rectBBox.left = 0;
zone.rectBBox.top = 0;
zone.rectBBox.right = 100;
zone.rectBBox.bottom = 200;
zone.fm = FM_OMNIFONT;
zone.rm = RM_OMNIFONT_MOR;
zone.filter = (CHR_FILTER)(FILTER_UPPERCASE | FILTER_DIGIT);
kRecInsertZone(hPage, II_CURRENT, &zone, -1);

The specification of this function in C# is:

RECERR kRecInsertZone(IntPtr hPage, IMAGEINDEX iiImg, [In] ZONE pZone, int nZone);

The specification of this function in Java is:

int kRecInsertZone(HPAGE hPage, IMAGEINDEX iiImg, ZONE pZone, int nZone)

The specification of this function in Python is:

def kRecInsertZone(hPage: "HPAGE", iiImg: int, pZone: "ZONE", nZone: int) -> int

◆ kRecLoadZones()

RECERR RECAPIKRN kRecLoadZones	(	HPAGE	hPage,
		LPCTSTR	pFileName )

Loading user zones.

This function loads the user zone list from a zone file. The function attaches the zone list to the page.

Parameters

[in]	hPage	Handle of the page.
[in]	pFileName	Name of the zone file to be loaded.

Return values

ZONE_SIZE_WARN	At least one zone has been truncated, because it extends beyond the image
ZONE_SIZE_ERR	At least one zone has not been loaded, because it extends beyond the image
RECERR	Other errors

Note: Calling this function removes any zone list (OCR zone list as well) already attached to the page.; This function can load zone files created by a previous version of OmniPage CSDK v2025.1.; The specification of this function in C# is:
RECERR kRecLoadZones(IntPtr hPage, string pFileName);

kRecLoadZones
RECERR RECAPIKRN kRecLoadZones(HPAGE hPage, LPCTSTR pFileName)
Loading user zones.; The specification of this function in Java is:
int kRecLoadZones(HPAGE hPage, String pFileName); The specification of this function in Python is:
def kRecLoadZones(hPage: "HPAGE", pFileName: str) -> int

◆ kRecLocateHandwriting()

RECERR RECAPIKRN kRecLocateHandwriting	(	int	sid,
		HPAGE	hPage,
		LOCATEHANDWRITINGMODE	mode,
		LPCRECT	pRect )

Find/remove handwriting on/from the given image.

This function finds the handwriting on the page. Depending on the mode parameter it retains or removes the handwriting. When the handwriting is retained, the function also returns handwriting zones. It is useful when these areas will be recognized by RM_RER or RM_HNR or another handwritten text recognizer. See also kRecCreateHandwritingPage.

Parameters

[in]	sid	Settings Collection ID.
[in]	hPage	Handle of the page.
[in]	mode	Specifies whether to keep or remove handwriting.
[in]	pRect	Pointer to the rectangular area to be detected (II_CURRENT coordinate system is used). If it is NULL the entire image is processed.

Return values

RECERR

Note: See the description about processing handwritten text.; The specification of this function in C# is:
RECERR kRecLocateHandwriting(int sid, IntPtr hPage, LOCATEHANDWRITINGMODE mode, RECT UserRect);

LOCATEHANDWRITINGMODE
LOCATEHANDWRITINGMODE
kRecLocateHandwriting modes
Definition KernelApi.h:3283

kRecLocateHandwriting
RECERR RECAPIKRN kRecLocateHandwriting(int sid, HPAGE hPage, LOCATEHANDWRITINGMODE mode, LPCRECT pRect)
Find/remove handwriting on/from the given image.; The specification of this function in C# Objects is:
Page.LocateHandwriting(LOCATEHANDWRITINGMODE mode, RECT UserRect);; The specification of this function in Java is:
int kRecLocateHandwriting(int sid, HPAGE hPage, LOCATEHANDWRITINGMODE mode, RECT pRect); The specification of this function in Python is:
def kRecLocateHandwriting(sid: int, hPage: "HPAGE", mode: "LOCATEHANDWRITINGMODE", pRect: "RECT") -> int

◆ kRecLocateZones()

RECERR RECAPIKRN kRecLocateZones	(	int	sid,
		HPAGE	hPage )

Page parsing.

This function analyzes the page layout structure of the image (auto-zoning). It finds text or graphic blocks on the page, builds an OCR zone list and then connects it to the page. The user zone list is not changed. It activates the PID_DECOMPOSITION process.

Parameters

[in]	sid	Settings Collection ID.
[in]	hPage	Handle of the page containing the OCR image to be analyzed.

Return values

RECERR

Note: OmniPage Capture SDK uses different auto-zoning algorithms for Latin-alphabet and Asian text. Ensure that the correct language has been set before calling kRecLocateZones (see: kRecSetLanguages, kRecManageLanguages).; OmniPage Capture SDK offers three different algorithms to be applied during auto-zoning of Latin-alphabet text, and two different algorithms during auto-zoning of CCJK and Arabic text; use the kRecSetDecompMethod function to specify the Page parser algorithm. For details, see also IMG_DECOMP.; If hPage does not contain II_BW image, an implicit secondary image conversion step is also performed automatically to convert the image to a despeckled B/W image (which will be stored in II_BW image of hPage) on which the auto-zoning function operates. The parameters for this conversion can be specified through the functions kRecSetImgBinarizationMode, kRecSetImgDespeckleMode, kRecSetImgBrightness, kRecSetImgThreshold, kRecSetImgResolEnhancement and kRecSetRMTradeoff.; If user zones are used together with kRecLocateZones they should be inserted before calling this function.; kRecLocateZones creates the OCR zones and puts them into the OCR zone list. Any previously inserted OCR zone is deleted first. The generated OCR zones are used by the recognition process and they may be modified by this.; The type of user zone determines how kRecLocateZones finds OCR zones inside them. See ZONETYPE for a description of individual types.; The OCR zones detected outside User zones take the value FILTER_DEFAULT, meaning that the zone will take on the global filter setting (which is by default FILTER_ALL to have no filtering).; The OCR zones detected outside User zones will always have the FM_DEFAULT and RM_AUTO values in their ZONE::fm and ZONE::rm fields, respectively. The "meaning" of the FM_DEFAULT filling method can be specified with the kRecSetDefaultFillingMethod function, while the "meaning" of the RM_AUTO recognition module can be specified with the kRecSetDefaultRecognitionModule function.; Since the function's zone location algorithm is optimized for searching text blocks, it is not recommended for locating barcodes. This situation can be handled with user zones.; Note that the kRecGetOCRZoneCount function can be used for inquiring the number of OCR zones in the zone list. The application can get the OCR zone parameters by the kRecGetOCRZoneInfo function. After modifying some zone attributes the kRecUpdateOCRZone function can be used to apply the changes.; kRecDetectFillingMethod is a useful function when you do not know the type of page to be processed.; This function can fill the line list of the HPAGE, but not all line situations are handled in the same way. See RLINE for more information.; The specification of this function in C# is:
RECERR kRecLocateZones(int sid, IntPtr hPage);

kRecLocateZones
RECERR RECAPIKRN kRecLocateZones(int sid, HPAGE hPage)
Page parsing.; The specification of this function in Objects: is:
Page.LocateZones();; The specification of this function in Java is:
int kRecLocateZones(int sid, HPAGE hPage); The specification of this function in Python is:
def kRecLocateZones(sid: int, hPage: "HPAGE") -> int

◆ kRecLocateZonesEx()

RECERR RECAPIKRN kRecLocateZonesEx	(	int	sid,
		HPAGE	hPage,
		int	fm_mode )

Extended page parsing.

This function analyzes the page layout automatically. It searches for areas on the page according to the specified filling modes (fm_mode). Different filling modes require different recognition algoritms. This function produces zones which will be recognized by the proper algorithms even when different filling modes are combined on the page. The parameter fm_mode describes the content type of the page that the caller wants to process. In all other respects, refer to the documentation of the function kRecLocateZones.

Parameters

[in]	sid	Settings Collection ID.
[in]	hPage	Handle of the page containing the OCR image to be analyzed.
[in]	fm_mode	Type(s) of the areas of interest.

Return values

RECERR

Note: If hPage has a user zone, this function will fail and return an error code.; The specification of this function in C# is:
RECERR kRecLocateZonesEx(int sid, IntPtr hPage, int fm_mode);

kRecLocateZonesEx
RECERR RECAPIKRN kRecLocateZonesEx(int sid, HPAGE hPage, int fm_mode)
Extended page parsing.; The specification of this function in Objects: is:
Page.LocateZonesEx(int fm_mode);; The specification of this function in Java is:
int kRecLocateZonesEx(int sid, HPAGE hPage, int fm_mode); The specification of this function in Python is:
def kRecLocateZonesEx(sid: int, hPage: "HPAGE", fm_mode: int) -> int

◆ kRecSaveOCRZones()

RECERR RECAPIKRN kRecSaveOCRZones	(	HPAGE	hPage,
		LPCTSTR	pFileName )

Saving the OCR zone list.

This function saves the current OCR zone list of the page into a zone file. The zone file can be loaded later by kRecLoadZones as user zones.

Parameters

[in]	hPage	Handle of the page.
[in]	pFileName	Name of the zone file to be created.

Return values

RECERR

Note: This function overwrites a pre-existing zone file with the same name.; The zone coordinates are saved into the zone file in twips (1/1440 inches).; The specification of this function in C# is:
RECERR kRecSaveOCRZones(IntPtr hPage, string pFileName);

kRecSaveOCRZones
RECERR RECAPIKRN kRecSaveOCRZones(HPAGE hPage, LPCTSTR pFileName)
Saving the OCR zone list.; The specification of this function in Java is:
int kRecSaveOCRZones(HPAGE hPage, String pFileName); The specification of this function in Python is:
def kRecSaveOCRZones(hPage: "HPAGE", pFileName: str) -> int

◆ kRecSaveZones()

RECERR RECAPIKRN kRecSaveZones	(	HPAGE	hPage,
		LPCTSTR	pFileName )

Saving the user zone list.

This function saves the current user zone list of the page into a zone file.

Parameters

[in]	hPage	Handle of the page.
[in]	pFileName	Name of the zone file to be created.

Return values

RECERR

Note: This function overwrites a pre-existing zone file with the same name.; The zone coordinates are saved into the zone file in twips (1/1440 inches).; The specification of this function in C# is:
RECERR kRecSaveZones(IntPtr hPage, string pFileName);

kRecSaveZones
RECERR RECAPIKRN kRecSaveZones(HPAGE hPage, LPCTSTR pFileName)
Saving the user zone list.; The specification of this function in Java is:
int kRecSaveZones(HPAGE hPage, String pFileName); The specification of this function in Python is:
def kRecSaveZones(hPage: "HPAGE", pFileName: str) -> int

◆ kRecSetDecompMethod()

RECERR RECAPIKRN kRecSetDecompMethod	(	int	sid,
		IMG_DECOMP	Algorithm )

Setting the decomp method.

This function specifies the Page parser algorithm setting of the Engine. This setting is applied whenever the auto-zoning algorithm is activated (PID_DECOMPOSITION process).

Parameters

[in]	sid	Settings Collection ID.
[in]	Algorithm	The page parser algorithm to be set.

Return values

RECERR

Note: If this function is not called, the default value, DCM_AUTO is applied.; This function sets the value of the setting Kernel.Decomp.Method. This setting can be retrieved by kRecGetDecompMethod.; The specification of this function in C# is:
RECERR kRecSetDecompMethod(int sid, IMG_DECOMP decompAlg);

kRecSetDecompMethod
RECERR RECAPIKRN kRecSetDecompMethod(int sid, IMG_DECOMP Algorithm)
Setting the decomp method.; The specification of this function in Java is:
int kRecSetDecompMethod(int sid, IMG_DECOMP Algorithm); The specification of this function in Python is:
def kRecSetDecompMethod(sid: int, Algorithm: int) -> int

◆ kRecSetForceSingleColumn()

RECERR RECAPIKRN kRecSetForceSingleColumn	(	int	sid,
		INTBOOL	bForceSingle )

Specify the Force Single Column mode.

This function specifies the Force Single Column mode for the page-layout PID_DECOMPOSITION process. It prevents the Engine's de-columnization from detecting columns and placing their contents one below the other. It is useful for conserving the columnar structure in tables.

Parameters

[in]	sid	Settings Collection ID.
[in]	bForceSingle	Force Single Column mode to be set (default is FALSE).

Return values

RECERR

Note: It is highly recommended to switch this on sparingly, only for cases where the image has a single tabular structure and chances are that the automatic decomposition would fail in analyzing the layout properly.; Single Column mode is handled in a different way in DCM_STANDARD mode. In this case, the "single column" is only a recommendation to the Decomposition Module, which may still generate multi-column results if this seems necessary (e.g. if the page parser finds more than one column, there are text and picture side by side, or texts are separated by vertical lines, etc.); This mode takes effect only when the page descriptor has its default value (kRecSetPageDescription).; This function sets the value of the setting Kernel.Decomp.ForceSingleColumn. This setting can be retrieved by kRecGetForceSingleColumn.; The specification of this function in C# is:
RECERR kRecSetForceSingleColumn(int sid, bool bEnable);

kRecSetForceSingleColumn
RECERR RECAPIKRN kRecSetForceSingleColumn(int sid, INTBOOL bForceSingle)
Specify the Force Single Column mode.; The specification of this function in Java is:
int kRecSetForceSingleColumn(int sid, int bForceSingle); The specification of this function in Python is:
def kRecSetForceSingleColumn(sid: int, bForceSingle: bool) -> int

◆ kRecSetNongriddedTableDetect()

RECERR RECAPIKRN kRecSetNongriddedTableDetect	(	int	sid,
		INTBOOL	bEnable )

Setting the non-gridded table detection.

This function sets the Non-gridded table detection setting of the Engine. Tables with visible grid lines (gridded tables) in an original page can usually be detected successfully by the auto-zoning function. In contrast, tables without visible cell separators (non-gridded tables) in the original are harder to identify as tables, because they might also be word lists or data arranged in columns. The OmniPage CSDK offers an algorithm for detecting such non-gridded tables more confidently. This feature of the Engine can only be used in conjunction with an auto-zoning step. The algorithm is based on the result of the character recognition and runs on the OCR zones created by auto-zoning (including the zones created from a WT_AUTO User zone).

Parameters

[in]	sid	Settings Collection ID.
[in]	bEnable	The value to be set for the non-gridded table detection setting (the default is `TRUE`).

Return values

RECERR

Note: The detection of non-gridded tables is not supported with the following recognition languages: Arabic, Simplified Chinese, Traditional Chinese, Hebrew, Japanese, and Korean.; When this feature is enabled, a special algorithm tries to find non-gridded table(s) on the image. This algorithm is based on the existing OCR zone list of the image and also on the recognized characters, i.e. this algorithm is activated as one of the last steps in the character recognition. Because of this, its effect might be visible only after a kRecRecognize or kRecProcessPages call.; The running of this algorithm is strongly controlled, since it may change the OCR zone list belonging to the image.; It is recommended to disable the Non-gridded table detection algorithm when the DCM_FAST page parser algorithm has been selected (see: kRecSetDecompMethod).; This function sets the value of the setting Kernel.Decomp.EnableNonGrid. This setting can be retrieved by kRecGetNongriddedTableDetect.; The specification of this function in C# is:
RECERR kRecSetNongriddedTableDetect(int sid, bool bEnable);

kRecSetNongriddedTableDetect
RECERR RECAPIKRN kRecSetNongriddedTableDetect(int sid, INTBOOL bEnable)
Setting the non-gridded table detection.; The specification of this function in Java is:
int kRecSetNongriddedTableDetect(int sid, int bEnable); The specification of this function in Python is:
def kRecSetNongriddedTableDetect(sid: int, bEnable: bool) -> int

◆ kRecSetPageDescription()

RECERR RECAPIKRN kRecSetPageDescription	(	int	sid,
		DWORD	PageDesc )

Setting page description data.

The page description data controls how the page parse (see kRecLocateZones and kRecRecognize) runs on the page. Page Descriptor defines describe the different behaviors of page parse. If the program has information about the image, it can help the page parse to achieve better layout results.

Parameters

[in]	sid	Settings Collection ID.
[in]	PageDesc	The Page Descriptor. It contains a set of LZ_ flags

Return values

RECERR

Note: This function sets the value of the setting Kernel.Decomp.PageDesc. This setting can be retrieved by kRecGetPageDescription.; The specification of this function in C# is:
RECERR kRecSetPageDescription(int sid, PAGEDESCRIPTION PageDesc);

kRecSetPageDescription
RECERR RECAPIKRN kRecSetPageDescription(int sid, DWORD PageDesc)
Setting page description data.; The specification of this function in Java is:
int kRecSetPageDescription(int sid, long PageDesc); The specification of this function in Python is:
def kRecSetPageDescription(sid: int, PageDesc: int) -> int

◆ kRecSetZoneBorder()

RECERR RECAPIKRN kRecSetZoneBorder	(	HPAGE	hPage,
		int	nZone,
		ZONE_BORDER_TYPE	ZoneBorderType,
		int	CellCount )

Setting the border type of a zone.

This function sets the border type of a hand-print zone. The filling method of the zone must be FM_HANDPRINT or FM_AUTO_HAND.

Parameters

[in]	hPage	Handle of the page.
[in]	nZone	Index of the user zone to be updated.
[in]	ZoneBorderType	Zone border type, see ZONE_BORDER_TYPE.
[in]	CellCount	Number of comb cells or boxes in the zone. 0 means automatic detection.

Return values

RECERR

Note: If you want to recognize the text in a comb, you can define the type of the borders, this helps improving accuracy.; The index of the first zone in the zone list is zero (0).; If the CellCount parameter is 0, then CSDK will detect the number of cells automatically.; The CellCount is not used in zone border types that do not define combs or boxes.; The specification of this function in C# is:
RECERR kRecSetZoneBorder(IntPtr hPage, nZone, ZoneBorder, CellCount);

kRecSetZoneBorder
RECERR RECAPIKRN kRecSetZoneBorder(HPAGE hPage, int nZone, ZONE_BORDER_TYPE ZoneBorderType, int CellCount)
Setting the border type of a zone.; The specification of this function in Java is:
int kRecSetZoneBorder(HPAGE hPage, nZone, ZoneBorder, CellCount); The specification of this function in Python is:
def kRecSetZoneBorder(hPage: "HPAGE", nZone: int, ZoneBorderType: int, CellCount: int) -> int

◆ kRecSetZoneLayout()

RECERR RECAPIKRN kRecSetZoneLayout	(	HPAGE	hPage,
		IMAGEINDEX	iiImg,
		LPCRECT	pRects,
		int	nRects,
		int	nZone )

Updating the user zone shape information.

This function updates the shape information of any zone in the user zone list. See also the definition of pizza-box for more information about zone shapes.

Parameters

[in]	hPage	Handle of the page.
[in]	iiImg	Index of the image in the page whose coordinate system you have used in defining the shape to be updated.
[in]	pRects	Array of RECTs for storing the shape information.
[in]	nRects	The number of RECTs in the shape information array.
[in]	nZone	Index of the zone to be updated.

Return values

RECERR

Note: The index of the first zone in the zone list is zero (0).; Any changes in the user zone list provokes deletion and regeneration of the OCR zone list.; The specification of this function in C# is:
RECERR kRecSetZoneLayout(IntPtr hPage, IMAGEINDEX iiImg, RECT[] pRects, int nZone);

kRecSetZoneLayout
RECERR RECAPIKRN kRecSetZoneLayout(HPAGE hPage, IMAGEINDEX iiImg, LPCRECT pRects, int nRects, int nZone)
Updating the user zone shape information.; The specification of this function in Java is:
int kRecSetZoneLayout(HPAGE hPage, IMAGEINDEX iiImg, RECT pRects, int nRects, int nZone); The specification of this function in Python is:
def kRecSetZoneLayout(hPage: "HPAGE", iiImg: int, pRects: "RectArray", nZone: int) -> int

◆ kRecSubZoneRect()

RECERR RECAPIKRN kRecSubZoneRect	(	HPAGE	hPage,
		IMAGEINDEX	iiImg,
		const RECT *	pRect,
		int	nZone )

Subtracting a rectangle from a user zone.

This function subtracts a rectangle from an existing user zone. The function recalculates the rectangle list of the zone. The resulting list must describe a pizza-box shape. Subtraction cannot be performed on table zones and vertical text zones.

Parameters

[in]	hPage	Handle of the page.
[in]	iiImg	The index of the image whose coordinate system you have used in defining the rectangle to be subtracted. It is recommended to use II_CURRENT coordinates if possible.
[in]	pRect	The rectangle to be subtracted from the given user zone.
[in]	nZone	The index of the user zone the rectangle is subtracted from.

Return values

RECERR

Note: The insertion of a new irregular zone can be performed only by rectangles. The first rectangle must be inserted by kRecInsertZone. The additional rectangles must be added by kRecAddZoneRect or removed by kRecSubZoneRect. The rectangle list of the given zone is automatically updated (the overlapping rectangles are recalculated; when subtracting, the remaining (maybe non-rectangular) area is broken into rectangular areas, etc.), thus the ZONE passed to the function kRecInsertZone must have only the bounding box of the irregular zone’s first rectangle (it must not be the bounding box of the whole zone).; Note that the index of the first zone in the zone list is zero (0).; Any changes in the user zone list provokes deletion of the OCR zone list.; The specification of this function in C# is:
RECERR kRecSubZoneRect(IntPtr hPage, IMAGEINDEX iiImg, RECT pRect, int nZone);

kRecSubZoneRect
RECERR RECAPIKRN kRecSubZoneRect(HPAGE hPage, IMAGEINDEX iiImg, const RECT *pRect, int nZone)
Subtracting a rectangle from a user zone.; The specification of this function in Java is:
int kRecSubZoneRect(HPAGE hPage, IMAGEINDEX iiImg, RECT pRect, int nZone); The specification of this function in Python is:
def kRecSubZoneRect(hPage: "HPAGE", iiImg: int, pRect: "RECT", nZone: int) -> int

◆ kRecUpdateOCRZone()

RECERR RECAPIKRN kRecUpdateOCRZone	(	HPAGE	hPage,
		IMAGEINDEX	iiImg,
		LPCZONE	pZone,
		int	nZone )

Updating the OCR zone.

This function updates the zone data of any zone in the OCR zone list. The fields ZONE::rectBBox, ZONE::type, ZONE::chk_fn (must be NULL) and chk_sect (must be empty string) cannot be modified.

Parameters

[in]	hPage	Handle of the page.
[in]	iiImg	Index of the image in the page, whose coordinate system you have used in defining the zone's boundary box.
[in]	pZone	Pointer to a zone structure with the necessary updating zone data.
[in]	nZone	Index of the zone to be updated.

Return values

RECERR

Note

The index of the first zone in the zone list is zero (0).

This function can modify only the following fields of the specified OCR zone: userdata, fm, rm, filter, chk_control. Other fields of the passed zone structure are not considered.

The preferred method of updating a zone is the following:

call the kRecGetOCRZoneInfo to ask the current parameters of the zone,
modify the zone parameters,
call the kRecUpdateOCRZone function.

It is the programmer's responsibility to specify a valid recognition module-filling method pair, otherwise any incorrectly set zones will have no recognition result. For more details see the topic Filling method - Recognition module combinations.

The specification of this function in C# is:

RECERR kRecUpdateOCRZone(IntPtr hPage, IMAGEINDEX iiImg, [In] ZONE pZone, int nZone);

kRecUpdateOCRZone

RECERR RECAPIKRN kRecUpdateOCRZone(HPAGE hPage, IMAGEINDEX iiImg, LPCZONE pZone, int nZone)

Updating the OCR zone.

The specification of this function in Java is:

int kRecUpdateOCRZone(HPAGE hPage, IMAGEINDEX iiImg, ZONE pZone, int nZone)

The specification of this function in Python is:

def kRecUpdateOCRZone(hPage: "HPAGE", iiImg: int, pZone: "ZONE", nZone: int) -> int

◆ kRecUpdateZone()

RECERR RECAPIKRN kRecUpdateZone	(	HPAGE	hPage,
		IMAGEINDEX	iiImg,
		LPCZONE	pZone,
		int	nZone )

Updating a user zone.

This function updates the zone data of any zone in the user zone list.

Parameters

[in]	hPage	Handle of the page.
[in]	iiImg	Index of the image in the page whose coordinate system you have used in defining the zone's boundary box.
[in]	pZone	Pointer to a zone structure with the necessary updating zone data.
[in]	nZone	Index of the zone to be updated.

Return values

RECERR

Note

The index of the first zone in the zone list is zero (0).

When updating a zone, the application should initialize all the fields of pZone either by the kRecInitZone or, in most cases, the kRecGetZoneInfo functions.

If the new ZONE::rectBBox is equal to the old one, the layout of the zone (kRecGetZoneLayout) is not changed. Otherwise it is modified to a simple rectangular zone equal to the new rectBBox (i.e. the one or more previous rectangles building up the zone are deleted and the only one rectBBox is stored).

The preferred method of updating a zone is the following:

call the kRecGetZoneInfo to ask the current parameters of the zone,
modify the zone parameters,
call the kRecUpdateZone function.

It is the programmer's responsibility to specify a valid recognition module-filling method pair. Otherwise any incorrectly set zones will have no recognition result. For more details see the topic Filling method - Recognition module combinations.

When updating a table-type zone the "cell-detection" algorithm will not be activated, which results in improper table detection within the zone. You can detect cells within a table zone by calling kRecLocateTable.

Any changes in the user zone list provokes deletion of the OCR zone list.

The specification of this function in C# is:

RECERR kRecUpdateZone(IntPtr hPage, IMAGEINDEX iiImg, [In] ZONE pZone, int nZone);

kRecUpdateZone

RECERR RECAPIKRN kRecUpdateZone(HPAGE hPage, IMAGEINDEX iiImg, LPCZONE pZone, int nZone)

Updating a user zone.

The specification of this function in Java is:

int kRecUpdateZone(HPAGE hPage, IMAGEINDEX iiImg, ZONE pZone, int nZone)

The specification of this function in Python is:

def kRecUpdateZone(hPage: "HPAGE", iiImg: int, pZone: "ZONE", nZone: int) -> int

Topics

Classes

Macros

Typedefs

Enumerations

Functions

Bitmasks of checking control

Filling modes for extended page parsing

Page Descriptor defines

Detailed Description

Auto-zoning

Adding zones manually

Adding zones from a zone file

Usage of Page Descriptor

Other related information

Detection of filling method of zones

Vertical text support

Macro Definition Documentation

◆ LZ_FORM

Typedef Documentation

◆ ZONE

Enumeration Type Documentation

◆ CREATEHANDWRINGPAGEMODE

◆ FILLINGMETHOD

◆ IMG_DECOMP

◆ LOCATEHANDWRITINGMODE

◆ RECOGNITIONMODULE

◆ ZONE_BORDER_TYPE

◆ ZONETYPE

Function Documentation

◆ kRecAddZoneRect()

◆ kRecCopyOCRZones()

◆ kRecCreateHandwritingPage()

◆ kRecDeleteAllZones()

◆ kRecDeleteZone()

◆ kRecGetDecompMethod()

◆ kRecGetForceSingleColumn()

◆ kRecGetNongriddedTableDetect()

◆ kRecGetOCRZoneCount()

◆ kRecGetOCRZoneData()

◆ kRecGetOCRZoneInfo()

◆ kRecGetOCRZoneLayout()

◆ kRecGetOCRZoneNodeArray()

◆ kRecGetPageDescription()

◆ kRecGetZoneBorder()

◆ kRecGetZoneCount()

◆ kRecGetZoneInfo()

◆ kRecGetZoneLayout()

◆ kRecGetZoneNodeArray()

◆ kRecInitZone()

◆ kRecInsertZone()

◆ kRecLoadZones()

◆ kRecLocateHandwriting()

◆ kRecLocateZones()

◆ kRecLocateZonesEx()

◆ kRecSaveOCRZones()

◆ kRecSaveZones()

◆ kRecSetDecompMethod()

◆ kRecSetForceSingleColumn()

◆ kRecSetNongriddedTableDetect()

◆ kRecSetPageDescription()

◆ kRecSetZoneBorder()

◆ kRecSetZoneLayout()

◆ kRecSubZoneRect()

◆ kRecUpdateOCRZone()

◆ kRecUpdateZone()