Use table detection results
When table detection is performed for a document all results are saved and are available in the XDoc. As a result, it is possible to use a script to manipulate those results as needed.
This use case requires that Table detection is enabled, but no training documents are required. Instead, the script uses the information from the table detection results to locate a specific table.
In the following example, the script looks for a table that has the "operating expenses" phrase, above the top-most header cells in the table. The page number where that table is located is returned by the script.
Private Sub Document_AfterExtract(ByVal pXDoc As CASCADELib.CscXDocument)
FindOpExTable(pXDoc)
End Sub
Private Sub FindOpExTable(ByVal pXDoc As CASCADELib.CscXDocument)
Dim i As Long
For i = 0 To pXDoc.Representations.Default.Tables.Count-1
Dim table As CscXDocTable
Set table = pXDoc.Representations.Default.Tables(i)
Dim StartPageIndex As Long
StartPageIndex = table.StartPage
'Find the topmost header cell
Dim j As Long
Dim topCellIndex As Long
Dim minTop As Long
topCellIndex = -1
minTop = 9999999
' loop over all header cells and find the topmost one. A header cell can repeat on every page or not
For j = 0 To table.HeaderCells.Count - 1
Dim topOfHeaderCell As Long
'if the cell exists on the start page
If table.HeaderCells(j).IsRepeatedOnPage(StartPageIndex) Then
' get the top position of that cell
topOfHeaderCell = table.HeaderCells(j).PageBoundingBoxes.ItemByPage(StartPageIndex).Top
'remember the cell with the top most index
If topOfHeaderCell < minTop Then
minTop = topOfHeaderCell
topCellIndex = j
End If
End If
Next
' Check the text line in the document directly above the topmost header cell of the table for the desired text
If topCellIndex > -1 Then
Dim IndexOfLineAboveTable As Long
If table.HeaderCells(topCellIndex).PageWords(StartPageIndex).Count > 0 Then
IndexOfLineAboveTable = table.HeaderCells(topCellIndex).PageWords(StartPageIndex).Item(0).LineIndex-1
If IndexOfLineAboveTable >=0 Then
Dim LineText As String
LineText = pXDoc.TextLines(IndexOfLineAboveTable).Text
If InStr(LCase(LineText), "operating expenses") > 0 Then
MsgBox("Found Operating Expenses table on page " + CStr(StartPageIndex))
Exit Sub
End If
End If
End If
End If
Next
End Sub