Platforms to show: All Mac Windows Linux Cross-Platform

/OCR/tesseract5/Tesseract5


Required plugins for this example: MBS OCR Plugin

You find this example project in your Plugins Download as a Xojo project file within the examples folder: /OCR/tesseract5/Tesseract5

This example is the version from Sun, 2nd Jul 2022.

Project "Tesseract5.xojo_binary_project"
Class App Inherits Application
Const kEditClear = "&Delete"
Const kFileQuit = "&Quit"
Const kFileQuitShortcut = ""
EventHandler Sub Open() // pick a test file Dim f1 As FolderItem = MainWindow.findfile("eurotext.tif") Dim w1 As New MainWindow // try here with 4 languages! w1.Init f1, "eng+deu+spa+fra" Dim f2 As FolderItem = MainWindow.findfile("phototest.tif") Dim w2 As New MainWindow // try in english w2.Init f2, "eng" End EventHandler
End Class
Class MainWindow Inherits Window
Control EditField1 Inherits TextArea
ControlInstance EditField1 Inherits TextArea
End Control
Control CheckBox1 Inherits CheckBox
ControlInstance CheckBox1 Inherits CheckBox
EventHandler Sub Action() Refresh End EventHandler
End Control
Control CheckBox2 Inherits CheckBox
ControlInstance CheckBox2 Inherits CheckBox
EventHandler Sub Action() Refresh End EventHandler
End Control
Control Out Inherits Canvas
ControlInstance Out Inherits Canvas
EventHandler Sub Paint(g As Graphics, areas() As REALbasic.Rect) If pic<>Nil Then g.DrawPicture pic, 0, 0, pic.Width, pic.Height, 0, 0, pic.Width, pic.Height end if g.ForeColor=&cFF0000 If OCR <> Nil Then // check result Dim r As TessResultIteratorMBS = OCR.ResultIterator if r<>Nil and CheckBox2.Value then Do // query values like confidence g.ForeColor=&c000000 if CheckBox1.Value then dim Confidence as Double = r.Confidence(r.kLevelWord) g.ForeColor=Rgb((100-Confidence)*2.55, Confidence*2.55, 0) end if dim rleft, rtop, rright, rbottom as integer // query bounding box If r.BoundingBox(r.kLevelWord, rleft, rtop, rright, rbottom) Then dim rwidth as integer = rright - rleft dim rheight as integer = rbottom - rtop g.DrawRect rleft, rtop, rwidth, rHeight end if // and go to next word Loop Until r.MoveNext(r.kLevelWord) = False end if End If End EventHandler
End Control
Shared Function FindFile(name as string) As FolderItem // Look for file in parent folders from executable on dim parent as FolderItem = app.ExecutableFile.Parent while parent<>Nil dim file as FolderItem = parent.Child(name) if file<>Nil and file.Exists then Return file end if parent = parent.Parent wend End Function
Sub Init(ImageFile as FolderItem, Languages as string) Title = ImageFile.Name If Not TessEngineMBS.LibraryLoaded Then // load on first try #If TargetLinux Then // please install tesseract library, so plugin can load it: // sudo apt-get install libtesseract5 // first load leptonica Const path = "liblept.so.5" Call TessEngineMBS.LoadLibrary(path) Const path = "libtesseract.so.5" If TessEngineMBS.LoadLibrary(path) Then 'MsgBox "OK" Else MsgBox TessEngineMBS.LibraryLoadErrorMessage Quit End If #ElseIf TargetMacOS Then // for macOS, please download from our website // install via homebrew // brew install tesseract-lang // first load leptonica Const path1 = "/usr/local/Cellar/leptonica/1.82.0/lib/liblept.5.dylib" If TessEngineMBS.LoadLibrary(path1) Then // then load tesseract Const path2 = "/usr/local/Cellar/tesseract/5.1.0/lib/libtesseract.5.dylib" If TessEngineMBS.LoadLibrary(path2) Then 'MessageBox "Version: "+TessEngineMBS.Version Else MsgBox TessEngineMBS.LibraryLoadErrorMessage Quit End If Else MsgBox TessEngineMBS.LibraryLoadErrorMessage Quit End If #ElseIf TargetWindows Then // get Windows DLL somewhere // e.g. https://github.com/UB-Mannheim/tesseract/wiki // change directory so DLLs find their dependencies Call TessEngineMBS.SetCurrentWorkingDirectory("C:\Program Files\Tesseract-OCR") // first load image processing library Call TessEngineMBS.LoadLibrary("liblept-5.dll") // and load the main DLL If TessEngineMBS.LoadLibrary("libtesseract-5.dll") Then 'MsgBox "OK" Else MsgBox TessEngineMBS.LibraryLoadErrorMessage Quit End If #Else ? #EndIf End If // search tessdata folder Dim tessdataFolder As FolderItem dim tessdataPath as string #If TargetLinux Then // just pass empty string for default path #ElseIf TargetMacOS Then 'tessdataFolder = FindFile("tessdata") tessdataFolder = GetFolderItem("/usr/local/Cellar/tesseract-lang/4.1.0/share/tessdata", FolderItem.PathTypeNative) If tessdataFolder = Nil Then MsgBox "Please get the tessdata folder!" Quit Else tessdataPath = tessdataFolder.NativePath End If #ElseIf TargetWindows Then tessdataPath = "C:\Program Files\Tesseract-OCR\tessdata" #Else ? #EndIf OCR = New TessEngineMBS If Not ocr.Initialize(tessdataPath, Languages) Then MsgBox "failed to initialize" Quit End If Dim AvailableLanguages() As String = OCR.GetAvailableLanguages Dim LoadedLanguages() As String = OCR.GetLoadedLanguages Dim p As Picture = ImageFile.OpenAsPicture If p <> Nil Then // pass iamge file 'ocr.SetImageFile(f) // pass picture object OCR.SetImage(p) // optional set page segmentation mode 'OCR.PageSegMode = OCR.kPageSegModeAuto // you can query a copy of the image it got to inspect 'Dim debugPicture As Picture = OCR.InputImage // optional call Recognize here 'Call OCR.Recognize // get the text Dim OCRText As String = OCR.GetText // now let's add some more details OCRText = ReplaceLineEndings(OCRText, EndOfLine) OCRText = OCRText + EndOfLine + "MeanTextConf: "+Str(ocr.MeanTextConf) OCRText = OCRText + EndOfLine + "Version: "+Str(ocr.Version) EditField1.Text = OCRText pic = p Else MsgBox "No picture?" End If End Sub
Property OCR As TessEngineMBS
Property pic As Picture
End Class
MenuBar MainMenuBar
MenuItem FileMenu = "&File"
MenuItem FileQuit = "#App.kFileQuit"
MenuItem EditMenu = "&Edit"
MenuItem EditUndo = "&Undo"
MenuItem EditSeparator1 = "-"
MenuItem EditCut = "Cu&t"
MenuItem EditCopy = "&Copy"
MenuItem EditPaste = "&Paste"
MenuItem EditClear = "#App.kEditClear"
MenuItem EditSeparator2 = "-"
MenuItem EditSelectAll = "Select &All"
End MenuBar
Class MyTessEngineMBS Inherits TessEngineMBS
EventHandler Function Cancel(words as integer) As boolean System.DebugLog CurrentMethodName + ": "+str(words) End EventHandler
EventHandler Function Progress(Left as integer, Right as Integer, Top as Integer, Bottom as Integer, Progress as Integer) As boolean System.DebugLog CurrentMethodName + ": "+Str(Left)+"x"+Str(top)+" "+Str(Right)+"x"+Str(Bottom)+" "+Str(Progress)+"%" End EventHandler
End Class
End Project

The items on this page are in the following plugins: MBS OCR Plugin.


The biggest plugin in space...