MeOCRLib.Puma DLL
API Reference
This is the interface
for the meocr100.dll. Included are four example projects. Two are written in C#
the other two in VB.Net. Please refer to the examples along with this document
to implement the library in your application. As you can see in the sample code
below MeOCRLib has been designed to be as simple as
possible to use. Both examples below are fully functional programs. Most of the
code is boiler plate. The actual code that calls MeOCRLib
is contained in just a few calls:
C#:
Puma ocrPuma = new Puma(); //Declare a new instance of
engine
ocrPuma.Init(); //Initialize the engine
rText = ocrPuma.Recognize(imgPuma); //Recognize the
image and return the text output
VB.NET:
Dim ocrPuma As New MeOCRLib.Puma //Declare
a new instance of engine
ocrPuma.Init() //Initialize the engine
rText = ocrPuma.Recognize(imgPuma) //Recognize the image and return the text
output
|
Below
are two of the examples provided:
C# Example:
namespace CSharpEx_01
{
public partial class frmCSharpEx_01 : Form
{
public frmCSharpEx_01()
{
InitializeComponent();
}
private void frmCSharpEx_01_Load(object sender, EventArgs e)
{
//Declare
a new instance og engine
Puma ocrPuma
= new Puma();
//Declare
a few needed varaibles
Image imgPuma;
string rText;
string FileName;
FileName =
System.IO.Path.GetDirectoryName(Environment.GetCommandLineArgs()[0])
+ @"\Sample_02.tif";
//Initialize
the engine
ocrPuma.Init();
//Load
the image
imgPuma = Image.FromFile(FileName);
//Set
the language
ocrPuma.Language
= 0; //
0 for English
//Set
output format
ocrPuma.OutputFormat
= 2; //
Formatted text
//Recognize
the image
rText = ocrPuma.Recognize(imgPuma);
//Close
the engine
ocrPuma.Close();
}
}
}
|
VB.NET Example:
Public Class VBasicEx_01
Private Sub VBasicEx_01_Load(sender As Object, e As EventArgs) Handles MyBase.Load
'Declare
a new instance og engine
Dim ocrPuma As New MeOCRLib.Puma
'Declare
a few needed varaibles
Dim imgPuma As Image
Dim rText As String
Dim FileName As String
FileName = System.IO.Path.GetDirectoryName(Environment.GetCommandLineArgs()(0)) & "\sample_02.tif"
'Initialize
the engine
ocrPuma.Init()
'Load
the image
imgPuma = Image.FromFile(FileName)
'Set the
language
ocrPuma.Language
= 0 '
0 for English
'Set
output format
ocrPuma.OutputFormat
= 2 '
Formatted text
'recognize
the image
rText = ocrPuma.Recognize(imgPuma)
'Close
the engine
ocrPuma.Close()
End Sub
End Class
|
Class Constructor:
public Puma()
Description:
Used
to create a new instance of Puma OCR Engine.
Use:
Puma ocrPuma = new Puma();
Class Members:
public bool Init(MeOCRLib.Puma.ProgressStepPtr pStep
= null)
Description:
Used
to initialize a new instance of Puma OCR Engine. Must be called right after creation new
instance is created.
Parameters:
pStep: is an optional parameter
and is the name of your callback delegate function for displaying the OCR progress. See documentation and
examples of ProgressStepPtr below for details.
Use:
ocrPuma.Init(pStep);
public void Close()
Description:
Used to close an instance of Puma OCR Engine. Should be called at
program exit to clean up.
Use:
ocrPuma.Close();
public string Recognize(System.Drawing.Image SrcImg,
[System.Drawing.Rectangle sRect
= null])
Description:
Used
to recognize an image.
Parameters:
SrcBmp: is a System.Drawing.Image object
of the image.
sRect: is a System.Drawing.Rectangle object
containing the coordinates of a zone to recognize rather than the whole image. This parameter is optional.
If it is not specified then the entire image is recognized.
Use:
string txt = ocrPuma,Recognize(imgPuma);
public string Recognize(System.Drawing.Bitmap SrcBmp,
[System.Drawing.Rectangle sRect
= null])
Description:
Used
to recognize an image.
Parameters:
SrcBmp: is a System.Drawing.Bitmap object
of the image.
sRect: is a System.Drawing.Rectangle object
containing the coordinates of a zone to recognize rather than the whole image. This
parameter is optional. If it is not specified then the entire image is recognized.
Use:
string txt = ocrPuma,Recognize(bmpPuma);
public string LanguageString(short index)
Description:
Used
to get the name of the language who is assigned to the numeric value in index.
Parameters:
index: is the numeric index of the
language.
Use:
string languaget = ocrPuma,LanguageString(index);
Class Properties:
public short Language { set; get; }
Description:
Used
to get or set the recognition language. Below is a list of values for specific
languages supported.
Languages
codes:
ENGLISH 0
GERMAN 1
FRENCH 2
RUSSIAN 3
SWEDISH 4
SPANISH 5
ITALIAN 6
RUS_ENG 7
UKRAINIAN 8
SERBIAN 9
CROATIAN 10
POLISH 11
DANISH 12
PORTUGUESE 13
DUTCH 14
DIG 15
UZBEK 16
KAZ 17
KAZ_ENG 18
CZECH 19
ROMAN 20
HUNGAR 21
BULGAR 22
SLOVENIAN 23
LATVIAN 24
LITHUANIAN 25
ESTONIAN 26
TURKISH 27
Use:
ocrPuma.Language = 0;
public short OutputFormat
{ set; get; }
Description:
Used
to get or set the recognized text output format.
Output
format codes:
RAW 0
PLAIN 1
FORMATTED 2
RTF 3
Use:
ocrPuma.OutputFormat = 0;
public short UnrecogChar
{ set; get; }
Description:
Used
to get or set the character ASCII code to be used when a character is unrecognized.
Use:
ocrPuma.Language = 128;
Class Delegate:
public delegate void ProgressStepPtr(int step, string stepName,
int lPercent)
Description:
The
callback delegate used as an event to update the OCR progress.
Use:
ocrPuma.Init(pStep);
void pStep(int step, string stepName, int lPercent)
{
//
Update the progress
}
|
Format Descriptions:
Plain: This format outputs plain
text with no structure or formatting.
Formatted: This
format outputs text with formatting preserved.
RTF: This format outputs text
with formatting preserved and rtf codes.
Raw: This format species the
coordinates and attributes for each character. Refer to Figure 1 below as you read this section.
Each character's information is
delimited by a carriage return (\r
or vbCr).
Each property member of a character is
delimited by ASCII code (01).
The first item in the format data is
the number of records in
the data. in the example below there are
10 character records
in the data. Each record consists of 8 values as follows:
1- Character value
2 - Left position in pixels
3
- Top position in pixels
4 - Right position in pixels
5 - Bottom position in pixels
6 - Internal Value
7 - Internal Value
8 - Internal
Value
If the "Character value"
is a {32) it is the end of a word.
If the "Character value"
is a {02) it is the end of a line fragment. A line
fragment is a group of words on a line separated by single spaces. In the line below there are 3 line fragments. Notice there is more than one space
between the line fragments.
This
is line fragment 1 This is line
fragment 2 This is line fragment
3
Figure 1.
10(\r or vbCr) <--------------------------------- Specifies
the number of records to follow.
E(01)665(01)201(01)697(01)236(01)22(01)4(01)1(\r
or vbCr)
n(01)700(01)209(01)733(01)235(01)22(01)4(01)1(\r
or vbCr)
d(01)736(01)198(01)771(01)236(01)22(01)4(01)1(\r
or vbCr)
{32) (01)0(01)0(01)0(01)0(01)0(01)0(01)0(\r or vbCr)
h(01)771(01)209(01)802(01)236(01)22(01)4(01)1(\r
or vbCr)
o (01)807(01)209(01)838(01)235(01)22(01)4(01)1(\r
or vbCr)
r(01)843(01)209(01)871(01)236(01)22(01)4(01)1(\r
or vbCr)
s(01)876(01)208(01)907(01)235(01)22(01)4(01)1(\r
or vbCr)
e(01)908(01)208(01)946(01)234(01)22(01)4(01)1(\r
or vbCr)
{32) (01)0(01)0(01)0(01)0(01)0(01)0(01)0(\r or vbCr)
{02) (01)0(01)0(01)0(01)0(01)0(01)0(01)0(\r or vbCr)
Here
is some sample code for parsing the Raw Format. This code can also be found in
the example programs provided:
C# Example:
//-----------------------------------------------------------
//Demonstrates
parsing of Raw Format
//-----------------------------------------------------------
void ParseResults(string chars)
{
int numItems
= -1;
if (chars != "")
{
string[] charToks = chars.Split('\r');
foreach (string Tok
in charToks)
{
if (numItems == -1)
{
numItems
= Convert.ToInt32(Tok);
}
else
{
string[] TokData = Tok.Split('\x01');
if (TokData[0] != "")
{
switch (TokData[0][0])
{
case ' ': //Specifies the end of a word.
break;
case '\x02': //Specifies the end of a line fragment.
break;
default: //OCR
character
char OCRChar
= TokData[0][0];
int
Left = Convert.ToInt32(TokData[1]);
int
Top = Convert.ToInt32(TokData[2]);
int
Right = Convert.ToInt32(TokData[3]);
int
Bottom = Convert.ToInt32(TokData[4]);
break;
}//switch (TokData[0][0])
}//if
(TokData[0] != "")
}//if
(numItems == -1)
}//foreach (string Tok in charToks)
}//if
(chars != "")
}
|
VB.NET Example:
'-----------------------------------------------------------
'Demonstrates
parsing of Raw Format
'-----------------------------------------------------------
Private Sub ParseResults(chars As String)
Dim numItems As Integer = -1
If chars <> "" Then
Dim charToks As String() = chars.Split(ControlChars.Cr)
For Each Tok
As String In charToks
If numItems = -1 Then
numItems = Convert.ToInt32(Tok)
Else
Dim TokData As String() = Tok.Split(ChrW(1))
If TokData(0) <> "" Then
Select Case TokData(0)(0)
Case " "
'Specifies the end of a word.
Exit Select
Case ChrW(2)
'Specifies the end of a line fragment.
Exit Select
Case Else
'OCR character
Dim OCRChar
As String = TokData(0)(0)
Dim Left As Integer = Convert.ToInt32(TokData(1))
Dim Top As Integer = Convert.ToInt32(TokData(2))
Dim Right As Integer = Convert.ToInt32(TokData(3))
Dim Bottom As Integer = Convert.ToInt32(TokData(4))
Exit Select
'switch (TokData[0][0])
End Select
'if (TokData[0] != "")
End If
'if (numItems
== -1)
End If
'foreach
(string Tok in charToks)
Next
End If
'if (chars != "")
End Sub
|