This error reminds me on another error we had:
http://forum.openkm.com/viewtopic.php?f ... ick#p12559
At that time, an ImageMagick-Bug was the problem.
Now i see "the same". I mean, the program (tesseract or OpenKM's call) crash ("Abnormal program termination") and in consequence there are no temp-files.
Screenshot of admin-tab with "system.ocr" of my windows-machine is attached.
Here's the "repository.xml":
Code: Select all<?xml version="1.0"?>
<!DOCTYPE Repository PUBLIC "-//The Apache Software Foundation//DTD Jackrabbit 1.6//EN"
"http://jackrabbit.apache.org/dtd/repository-1.6.dtd">
<Repository>
<!-- virtual file system where the repository stores global state
(e.g. registered namespaces, custom node types, etc.) -->
<FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
<param name="path" value="${rep.home}/repository"/>
</FileSystem>
<!-- Security configuration -->
<Security appName="OpenKM">
<!-- Security manager: FQN of class implementing the JackrabbitSecurityManager interface -->
<!--<SecurityManager class="org.apache.jackrabbit.core.DefaultSecurityManager" workspaceName="security">-->
<!-- workspace access: FQN of class implementing the WorkspaceAccessManager interface -->
<!-- <WorkspaceAccessManager class="..."/> -->
<!-- <param name="config" value="${rep.home}/security.xml"/> -->
<!--</SecurityManager>-->
<!-- Access manager: FQN of class implementing the AccessManager interface -->
<AccessManager class="com.openkm.core.OKMAccessManager"/>
<!-- <AccessManager class="org.apache.jackrabbit.core.security.SimpleAccessManager"/> -->
<!-- <AccessManager class="org.apache.jackrabbit.core.security.DefaultAccessManager"> -->
<!-- <param name="config" value="${rep.home}/access.xml"/> -->
<!-- </AccessManager> -->
<!-- <LoginModule class="org.apache.jackrabbit.core.security.simple.SimpleLoginModule"> -->
<!-- <LoginModule class="org.apache.jackrabbit.core.security.authentication.DefaultLoginModule"> -->
<!-- Anonymous user name ('anonymous' is the default value) -->
<!-- <param name="anonymousId" value="anonymous"/> -->
<!-- Administrator user id (default value if param is missing is 'admin') -->
<!-- <param name="adminId" value="admin"/> -->
<!-- <param name="principalProvider" value="..."/> -->
<!--</LoginModule>-->
</Security>
<!-- Location of workspaces root directory and name of default workspace -->
<Workspaces rootPath="${rep.home}/workspaces" defaultWorkspace="default"/>
<!-- Workspace configuration template:
used to create the initial workspace if there's no workspace yet -->
<Workspace name="${wsp.name}">
<!-- Virtual file system of the workspace:
class: FQN of class implementing the FileSystem interface -->
<FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
<param name="path" value="${wsp.home}"/>
</FileSystem>
<!-- Persistence manager of the workspace:
class: FQN of class implementing the PersistenceManager interface -->
<PersistenceManager class="org.apache.jackrabbit.core.persistence.bundle.DerbyPersistenceManager">
<param name="url" value="jdbc:derby:${wsp.home}/db;create=true"/>
<param name="schemaObjectPrefix" value="${wsp.name}_"/>
</PersistenceManager>
<!-- Search index and the file system it uses.
class: FQN of class implementing the QueryHandler interface -->
<SearchIndex class="org.apache.jackrabbit.core.query.lucene.SearchIndex">
<param name="path" value="${wsp.home}/index"/>
<param name="textFilterClasses" value="
org.apache.jackrabbit.extractor.PlainTextExtractor,
org.apache.jackrabbit.extractor.MsWordTextExtractor,
org.apache.jackrabbit.extractor.MsExcelTextExtractor,
org.apache.jackrabbit.extractor.MsPowerPointTextExtractor,
org.apache.jackrabbit.extractor.OpenOfficeTextExtractor,
org.apache.jackrabbit.extractor.RTFTextExtractor,
org.apache.jackrabbit.extractor.HTMLTextExtractor,
org.apache.jackrabbit.extractor.XMLTextExtractor,
org.apache.jackrabbit.extractor.PngTextExtractor,
org.apache.jackrabbit.extractor.MsOutlookTextExtractor,
com.openkm.extractor.PdfTextExtractor,
com.openkm.extractor.AudioTextExtractor,
com.openkm.extractor.ExifTextExtractor,
com.openkm.extractor.CuneiformTextExtractor,
com.openkm.extractor.SourceCodeTextExtractor,
com.openkm.extractor.MsOffice2007TextExtractor,
com.openkm.extractor.Tesseract2TextExtractor"/>
<param name="extractorPoolSize" value="2"/>
<param name="supportHighlighting" value="false"/>
<param name="indexingConfiguration" value="${wsp.home}/../../../indexing_configuration.xml"/>
</SearchIndex>
</Workspace>
<!-- Configures the versioning -->
<Versioning rootPath="${rep.home}/version">
<!-- Configures the filesystem to use for versioning for the respective
persistence manager -->
<FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
<param name="path" value="${rep.home}/version" />
</FileSystem>
<!-- Configures the persistence manager to be used for persisting version state.
Please note that the current versioning implementation is based on
a 'normal' persistence manager, but this could change in future
implementations. -->
<PersistenceManager class="org.apache.jackrabbit.core.persistence.bundle.DerbyPersistenceManager">
<param name="url" value="jdbc:derby:${rep.home}/version/db;create=true"/>
<param name="schemaObjectPrefix" value="version_"/>
</PersistenceManager>
</Versioning>
<!-- Search index for content that is shared repository wide
(/jcr:system tree, contains mainly versions) -->
<SearchIndex class="org.apache.jackrabbit.core.query.lucene.SearchIndex">
<param name="path" value="${rep.home}/repository/index"/>
<param name="textFilterClasses" value=""/>
<param name="extractorPoolSize" value="2"/>
<param name="supportHighlighting" value="false"/>
</SearchIndex>
<!-- DataStore improve file handling performance -->
<DataStore class="org.apache.jackrabbit.core.data.FileDataStore">
<param name="path" value="${rep.home}/repository/datastore"/>
<param name="minRecordLength" value="100"/>
</DataStore>
</Repository>
Here's the "workspace.xml":
Code: Select all<?xml version="1.0" encoding="UTF-8"?>
<Workspace name="default">
<!-- Virtual file system of the workspace:
class: FQN of class implementing the FileSystem interface -->
<FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
<param name="path" value="${wsp.home}"/>
</FileSystem>
<!-- Persistence manager of the workspace:
class: FQN of class implementing the PersistenceManager interface -->
<PersistenceManager class="org.apache.jackrabbit.core.persistence.bundle.DerbyPersistenceManager">
<param name="url" value="jdbc:derby:${wsp.home}/db;create=true"/>
<param name="schemaObjectPrefix" value="${wsp.name}_"/>
</PersistenceManager>
<!-- Search index and the file system it uses.
class: FQN of class implementing the QueryHandler interface -->
<SearchIndex class="org.apache.jackrabbit.core.query.lucene.SearchIndex">
<param name="path" value="${wsp.home}/index"/>
<param name="textFilterClasses" value="
org.apache.jackrabbit.extractor.PlainTextExtractor,
org.apache.jackrabbit.extractor.MsWordTextExtractor,
org.apache.jackrabbit.extractor.MsExcelTextExtractor,
org.apache.jackrabbit.extractor.MsPowerPointTextExtractor,
org.apache.jackrabbit.extractor.OpenOfficeTextExtractor,
org.apache.jackrabbit.extractor.RTFTextExtractor,
org.apache.jackrabbit.extractor.HTMLTextExtractor,
org.apache.jackrabbit.extractor.XMLTextExtractor,
org.apache.jackrabbit.extractor.PngTextExtractor,
org.apache.jackrabbit.extractor.MsOutlookTextExtractor,
com.openkm.extractor.PdfTextExtractor,
com.openkm.extractor.AudioTextExtractor,
com.openkm.extractor.ExifTextExtractor,
com.openkm.extractor.CuneiformTextExtractor,
com.openkm.extractor.SourceCodeTextExtractor,
com.openkm.extractor.MsOffice2007TextExtractor,
com.openkm.extractor.Tesseract2TextExtractor"/>
<param name="extractorPoolSize" value="2"/>
<param name="supportHighlighting" value="false"/>
<param name="indexingConfiguration" value="${wsp.home}/../../../indexing_configuration.xml"/>
</SearchIndex>
</Workspace>
Atm all is configured for tesseract 2.