The following document contains the results of RAT (Release Audit Tool).
*****************************************************
Summary
-------
Notes: 4
Binaries: 7
Archives: 1
Standards: 96
Apache Licensed: 88
Generated Documents: 0
JavaDocs are generated and so license header is optional
Generated files do not required license headers
8 Unknown Licenses
*******************************
Archives (+ indicates readable, $ unreadable):
+ src/test/resources/test-documents/test-documents.zip
*****************************************************
Files with AL headers will be marked L
Binary files (which do not require AL headers) will be marked B
Compressed archives will be marked A
Notices, licenses etc will be marked N
!????? CHANGES.txt
AL HEADER.txt
N KEYS
N LICENSE.txt
N NOTICE.txt
AL pom.xml
N README.txt
AL src/main/assembly/bin.xml
AL src/main/assembly/src.xml
AL src/main/java/org/apache/tika/config/TikaConfig.java
AL src/main/java/org/apache/tika/exception/CauseIOException.java
AL src/main/java/org/apache/tika/exception/TikaException.java
AL src/main/java/org/apache/tika/metadata/CreativeCommons.java
AL src/main/java/org/apache/tika/metadata/DublinCore.java
AL src/main/java/org/apache/tika/metadata/HttpHeaders.java
AL src/main/java/org/apache/tika/metadata/Metadata.java
AL src/main/java/org/apache/tika/metadata/MSOffice.java
AL src/main/java/org/apache/tika/metadata/package.html
AL src/main/java/org/apache/tika/metadata/SpellCheckedMetadata.java
AL src/main/java/org/apache/tika/metadata/TikaMetadataKeys.java
AL src/main/java/org/apache/tika/metadata/TikaMimeKeys.java
AL src/main/java/org/apache/tika/mime/Clause.java
AL src/main/java/org/apache/tika/mime/HexCoDec.java
AL src/main/java/org/apache/tika/mime/Magic.java
AL src/main/java/org/apache/tika/mime/MagicClause.java
AL src/main/java/org/apache/tika/mime/MagicMatch.java
AL src/main/java/org/apache/tika/mime/MimeType.java
AL src/main/java/org/apache/tika/mime/MimeTypeException.java
AL src/main/java/org/apache/tika/mime/MimeTypes.java
AL src/main/java/org/apache/tika/mime/MimeTypesFactory.java
AL src/main/java/org/apache/tika/mime/MimeTypesReader.java
AL src/main/java/org/apache/tika/mime/Operator.java
AL src/main/java/org/apache/tika/mime/Patterns.java
AL src/main/java/org/apache/tika/parser/AutoDetectParser.java
AL src/main/java/org/apache/tika/parser/EmptyParser.java
AL src/main/java/org/apache/tika/parser/ErrorParser.java
AL src/main/java/org/apache/tika/parser/html/HtmlParser.java
AL src/main/java/org/apache/tika/parser/microsoft/ExcelEventParser.java
AL src/main/java/org/apache/tika/parser/microsoft/ExcelParser.java
AL src/main/java/org/apache/tika/parser/microsoft/FilteredStringWriter.java
AL src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
AL src/main/java/org/apache/tika/parser/microsoft/PowerPointExtractor.java
AL src/main/java/org/apache/tika/parser/microsoft/PowerPointParser.java
AL src/main/java/org/apache/tika/parser/microsoft/PPTConstants.java
AL src/main/java/org/apache/tika/parser/microsoft/Slide.java
AL src/main/java/org/apache/tika/parser/microsoft/TextBox.java
AL src/main/java/org/apache/tika/parser/microsoft/Word6CHPBinTable.java
AL src/main/java/org/apache/tika/parser/microsoft/Word6Extractor.java
AL src/main/java/org/apache/tika/parser/microsoft/WordParser.java
AL src/main/java/org/apache/tika/parser/microsoft/WordTextBuffer.java
AL src/main/java/org/apache/tika/parser/microsoft/WordTextPiece.java
AL src/main/java/org/apache/tika/parser/opendocument/OpenOfficeEntityResolver.java
AL src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java
AL src/main/java/org/apache/tika/parser/Parser.java
AL src/main/java/org/apache/tika/parser/ParserDecorator.java
AL src/main/java/org/apache/tika/parser/ParserPostProcessor.java
AL src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
AL src/main/java/org/apache/tika/parser/pdf/PDFParser.java
AL src/main/java/org/apache/tika/parser/rtf/RTFParser.java
AL src/main/java/org/apache/tika/parser/txt/TXTParser.java
AL src/main/java/org/apache/tika/parser/xml/XMLParser.java
AL src/main/java/org/apache/tika/sax/AppendableAdaptor.java
AL src/main/java/org/apache/tika/sax/ContentHandlerDecorator.java
AL src/main/java/org/apache/tika/sax/TeeContentHandler.java
AL src/main/java/org/apache/tika/sax/WriteOutContentHandler.java
AL src/main/java/org/apache/tika/sax/XHTMLContentHandler.java
AL src/main/java/org/apache/tika/utils/ParseUtils.java
AL src/main/java/org/apache/tika/utils/RegexUtils.java
AL src/main/java/org/apache/tika/utils/RereadableInputStream.java
AL src/main/java/org/apache/tika/utils/StringUtil.java
AL src/main/java/org/apache/tika/utils/Utils.java
AL src/main/resources/mime/tika-mimetypes.xml
AL src/main/resources/tika-config.xml
AL src/site/apt/index.apt
B src/site/resources/tika.png
B src/site/resources/tika.xcf
AL src/site/site.xml
AL src/test/java/org/apache/tika/exception/CauseIOExceptionTest.java
AL src/test/java/org/apache/tika/metadata/TestMetadata.java
AL src/test/java/org/apache/tika/metadata/TestSpellCheckedMetadata.java
AL src/test/java/org/apache/tika/mime/MimeTypesTest.java
AL src/test/java/org/apache/tika/mime/MimeTypeTest.java
AL src/test/java/org/apache/tika/mime/PatternsTest.java
AL src/test/java/org/apache/tika/mime/TestMimeTypes.java
AL src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
AL src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
AL src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
AL src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
AL src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
AL src/test/java/org/apache/tika/parser/txt/TXTParserTest.java
AL src/test/java/org/apache/tika/sax/AppendableAdaptorTest.java
AL src/test/java/org/apache/tika/TestParsers.java
AL src/test/java/org/apache/tika/TestRereadableInputStream.java
AL src/test/java/org/apache/tika/utils/RegexUtilsTest.java
AL src/test/resources/log4j/log4j.properties
A src/test/resources/test-documents/test-documents.zip
B src/test/resources/test-documents/testEXCEL.xls
!????? src/test/resources/test-documents/testHTML.html
!????? src/test/resources/test-documents/testHTML_utf8.html
B src/test/resources/test-documents/testOpenOffice2.odt
B src/test/resources/test-documents/testPDF.pdf
B src/test/resources/test-documents/testPPT.ppt
!????? src/test/resources/test-documents/testRTF.rtf
!????? src/test/resources/test-documents/testTXT.txt
B src/test/resources/test-documents/testWORD.doc
!????? src/test/resources/test-documents/testXML.xml
!????? tika.log
!????? velocity.log
*****************************************************
Printing headers for files without AL header...
=======================================================================
==CHANGES.txt
=======================================================================
Tika Change Log
Release 0.1-incubating - 12/27/2007
1. TIKA-5 - Port Metadata Framework from Nutch (mattmann)
2. TIKA-11 - Consolidate test classes into a src/test/java directory tree (mattmann)
3. TIKA-15 - Utils.print does not print a Content having no value (jukka)
4. TIKA-19 - org.apache.tika.TestParsers fails (bdelacretaz)
5. TIKA-16 - Issues with data files used for testing by TestParsers (bdelacretaz)
6. TIKA-14 - MimeTypeUtils.getMimeType() returns the default mime type for
.odt (Open Office) file (bdelacretaz)
7. TIKA-12 - Add URL capability to MimeTypesUtils (jukka)
8. TIKA-13 - Fix obsolete package names in config.xml (siren)
9. TIKA-10 - Remove MimeInfoException catch clauses and import from TestParsers (siren)
10. TIKA-8 - Replaced the jmimeinfo dependency with a trivial mime type detector (jukka)
11. TIKA-7 - Added the Lius Lite code. Added missing dependencies to POM (jukka)
12. TIKA-18 - "Office" interface should be renamed "MSOffice" (mattmann)
13. TIKA-23 - Decouple Parser from ParserConfig (jukka)
14. TIKA-6 - Port Nutch (or better) MimeType detection system into Tika (J. Charron & mattmann)
15. TIKA-25 - Removed hardcoded reference to C:\oo.xml in OpenOfficeParser (K. Bennett & jukka)
16. TIKA-17 - Need to support URL's for input resources. (K. Bennett & mattmann)
17. TIKA-22 - Remove @author tags from the java source (mattmann)
18. TIKA-21 - Simplified configuration code (jukka)
19. TIKA-17 - Rename all "Lius" classes to be "Tika" classes (jukka)
20. TIKA-30 - Added utility constructors to TikaConfig (K. Bennett & jukka)
21. TIKA-28 - Rename config.xml to tika-config.xml or similar (mattmann)
22. TIKA-26 - Use Map<String, Content> instead of List<Content> (jukka)
23. TIKA-31 - protected Parser.parse(InputStream stream,
=======================================================================
==src/test/resources/test-documents/testHTML.html
=======================================================================
<html>
<head>
<title>Title : Test Indexation Html</title>
</head>
<body>
<h1>Test Indexation Html</h1>
<p>Indexation du fichier</p>
</body>
</html>
=======================================================================
==src/test/resources/test-documents/testHTML_utf8.html
=======================================================================
<html>
<head>
<title>Title : Tilte with UTF-8 chars ???§??</title>
</head>
<body>
<h1>Content with UTF-8 chars</h1>
<p>???§??</p>
</body>
</html>
=======================================================================
==src/test/resources/test-documents/testRTF.rtf
=======================================================================
{\rtf1\ansi\ansicpg1252\uc1\deff0\stshfdbch0\stshfloch0\stshfhich0\stshfbi0\deflang1036\deflangfe1036{\fonttbl{\f0\froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f37\froman\fcharset238\fprq2 Times New Roman CE;}
{\f38\froman\fcharset204\fprq2 Times New Roman Cyr;}{\f40\froman\fcharset161\fprq2 Times New Roman Greek;}{\f41\froman\fcharset162\fprq2 Times New Roman Tur;}{\f42\froman\fcharset177\fprq2 Times New Roman (Hebrew);}
{\f43\froman\fcharset178\fprq2 Times New Roman (Arabic);}{\f44\froman\fcharset186\fprq2 Times New Roman Baltic;}{\f45\froman\fcharset163\fprq2 Times New Roman (Vietnamese);}}{\colortbl;\red0\green0\blue0;\red0\green0\blue255;\red0\green255\blue255;
\red0\green255\blue0;\red255\green0\blue255;\red255\green0\blue0;\red255\green255\blue0;\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0;\red128\green0\blue128;\red128\green0\blue0;\red128\green128\blue0;
\red128\green128\blue128;\red192\green192\blue192;}{\stylesheet{\ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs24\lang1036\langfe1036\cgrid\langnp1036\langfenp1036 \snext0 Normal;}{\*\cs10 \additive \ssemihidden
Default Paragraph Font;}{\*\ts11\tsrowd\trftsWidthB3\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\trcbpat1\trcfpat1\tscellwidthfts0\tsvertalt\tsbrdrt\tsbrdrl\tsbrdrb\tsbrdrr\tsbrdrdgl\tsbrdrdgr\tsbrdrh\tsbrdrv
\ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs20\lang1024\langfe1024\cgrid\langnp1024\langfenp1024 \snext11 \ssemihidden Normal Table;}}{\*\latentstyles\lsdstimax156\lsdlockeddef0}{\*\rsidtbl \rsid2954171\rsid10375891}
{\*\generator Microsoft Word 11.0.6568;}{\info{\title Test d\'92indexation Word}{\author Bibliotheque}{\operator Bibliotheque}{\creatim\yr2006\mo5\dy18\hr12\min19}{\revtim\yr2006\mo5\dy18\hr12\min19}{\version2}{\edmins0}{\nofpages1}{\nofwords3}
{\nofchars21}{\*\company Universite Laval}{\nofcharsws23}{\vern24579}}\paperw11906\paperh16838\margl1417\margr1417\margt1417\margb1417
\deftab708\widowctrl\ftnbj\aenddoc\hyphhotz425\noxlattoyen\expshrtn\noultrlspc\dntblnsbdb\nospaceforul\formshade\horzdoc\dgmargin\dghspace180\dgvspace180\dghorigin1417\dgvorigin1417\dghshow1\dgvshow1
\jexpand\viewkind1\viewscale100\pgbrdrhead\pgbrdrfoot\splytwnine\ftnlytwnine\htmautsp\nolnhtadjtbl\useltbaln\alntblind\lytcalctblwd\lyttblrtgr\lnbrkrule\nobrkwrptbl\snaptogridincell\allowfieldendsel\wrppunct\asianbrkrule\nojkernpunct\rsidroot2954171 \fet0
\sectd \linex0\headery708\footery708\colsx708\endnhere\sectlinegrid360\sectdefaultcl\sftnbj {\*\pnseclvl1\pnucrm\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl2\pnucltr\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl3
\pndec\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl4\pnlcltr\pnstart1\pnindent720\pnhang {\pntxta )}}{\*\pnseclvl5\pndec\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl6\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}
{\*\pnseclvl7\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl8\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl9\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}\pard\plain
\ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs24\lang1036\langfe1036\cgrid\langnp1036\langfenp1036 {\insrsid2954171 Test d\rquote indexation Word
\par
\par }}
=======================================================================
==src/test/resources/test-documents/testTXT.txt
=======================================================================
Test d'indexation de Txt
http://www.apache.org
=======================================================================
==src/test/resources/test-documents/testXML.xml
=======================================================================
<?xml version="1.0" encoding="UTF-8"?>
<oaidc:dc xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:oaidc="http://www.openarchives.org/OAI/2.0/oai_dc/">
<dc:title>Archim?Žde et Lius</dc:title>
<dc:creator>Rida Benjelloun</dc:creator>
<dc:subject>Java</dc:subject>
<dc:subject>XML</dc:subject>
<dc:subject>XSLT</dc:subject>
<dc:subject>JDOM</dc:subject>
<dc:subject>Indexation</dc:subject>
<dc:description>Framework d'indexation des documents XML, HTML, PDF etc.. </dc:description>
<dc:identifier>http://www.apache.org</dc:identifier>
<dc:date>2000-12</dc:date>
<dc:type>test</dc:type>
<dc:format>application/msword</dc:format>
<dc:language>Fr</dc:language>
<dc:rights>Non restreint</dc:rights>
</oaidc:dc>
=======================================================================
==tika.log
=======================================================================
=======================================================================
==velocity.log
=======================================================================
Sun Jan 06 19:01:24 PST 2008 [debug] AvalonLogSystem initialized using logfile 'velocity.log'
Sun Jan 06 19:01:24 PST 2008 [info] **************************************************************
Sun Jan 06 19:01:24 PST 2008 [info] Starting Jakarta Velocity v1.4
Sun Jan 06 19:01:24 PST 2008 [info] RuntimeInstance initializing.
Sun Jan 06 19:01:24 PST 2008 [info] Default Properties File: org/apache/velocity/runtime/defaults/velocity.properties
Sun Jan 06 19:01:24 PST 2008 [info] Trying to use logger class org.apache.velocity.runtime.log.AvalonLogSystem
Sun Jan 06 19:01:24 PST 2008 [info] Using logger class org.apache.velocity.runtime.log.AvalonLogSystem
Sun Jan 06 19:01:24 PST 2008 [info] Default ResourceManager initializing. (class org.apache.velocity.runtime.resource.ResourceManagerImpl)
Sun Jan 06 19:01:24 PST 2008 [info] Resource Loader Instantiated: org.apache.velocity.runtime.resource.loader.FileResourceLoader
Sun Jan 06 19:01:24 PST 2008 [info] FileResourceLoader : initialization starting.
Sun Jan 06 19:01:24 PST 2008 [info] FileResourceLoader : adding path '/Users/mattmann/.maven/cache/maven-xdoc-plugin-1.8/plugin-resources/templates'
Sun Jan 06 19:01:24 PST 2008 [info] FileResourceLoader : initialization complete.
Sun Jan 06 19:01:24 PST 2008 [info] ResourceCache : initialized. (class org.apache.velocity.runtime.resource.ResourceCacheImpl)
Sun Jan 06 19:01:24 PST 2008 [info] Default ResourceManager initialization complete.
Sun Jan 06 19:01:24 PST 2008 [info] Loaded System Directive: org.apache.velocity.runtime.directive.Literal
Sun Jan 06 19:01:24 PST 2008 [info] Loaded System Directive: org.apache.velocity.runtime.directive.Macro
Sun Jan 06 19:01:24 PST 2008 [info] Loaded System Directive: org.apache.velocity.runtime.directive.Parse
Sun Jan 06 19:01:24 PST 2008 [info] Loaded System Directive: org.apache.velocity.runtime.directive.Include
Sun Jan 06 19:01:24 PST 2008 [info] Loaded System Directive: org.apache.velocity.runtime.directive.Foreach
Sun Jan 06 19:01:24 PST 2008 [info] Created: 20 parsers.
Sun Jan 06 19:01:24 PST 2008 [info] Velocimacro : initialization starting.
Sun Jan 06 19:01:24 PST 2008 [info] Velocimacro : adding VMs from VM library template : VM_global_library.vm
Sun Jan 06 19:01:24 PST 2008 [error] ResourceManager : unable to find resource 'VM_global_library.vm' in any resource loader.
Sun Jan 06 19:01:24 PST 2008 [info] Velocimacro : error using VM library template VM_global_library.vm : org.apache.velocity.exception.ResourceNotFoundException: Unable to find resource 'VM_global_library.vm'
Sun Jan 06 19:01:24 PST 2008 [info] Velocimacro : VM library template macro registration complete.
Sun Jan 06 19:01:24 PST 2008 [info] Velocimacro : allowInline = true : VMs can be defined inline in templates
Sun Jan 06 19:01:24 PST 2008 [info] Velocimacro : allowInlineToOverride = false : VMs defined inline may NOT replace previous VM definitions
Sun Jan 06 19:01:24 PST 2008 [info] Velocimacro : allowInlineLocal = false : VMs defined inline will be global in scope if allowed.
Sun Jan 06 19:01:24 PST 2008 [info] Velocimacro : messages on : VM system will output logging messages
Sun Jan 06 19:01:24 PST 2008 [info] Velocimacro : autoload off : VM system will not automatically reload global library macros
Sun Jan 06 19:01:24 PST 2008 [info] Velocimacro : initialization complete.
Sun Jan 06 19:01:24 PST 2008 [info] Velocity successfully started.
Sun Jan 06 19:01:24 PST 2008 [info] ResourceManager : found cvs-usage.xml with loader org.apache.velocity.runtime.resource.loader.FileResourceLoader
Sun Jan 06 19:01:24 PST 2008 [error] RHS of #set statement is null. Context will not be modified. cvs-usage.xml [line 28, column 5]
Sun Jan 06 19:01:24 PST 2008 [info] ResourceManager : found index.xml with loader org.apache.velocity.runtime.resource.loader.FileResourceLoader
Sun Jan 06 19:01:24 PST 2008 [info] ResourceManager : found maven-reports.xml with loader org.apache.velocity.runtime.resource.loader.FileResourceLoader
Sun Jan 06 19:01:24 PST 2008 [info] ResourceManager : found dependencies.xml with loader org.apache.velocity.runtime.resource.loader.FileResourceLoader
Sun Jan 06 19:01:24 PST 2008 [info] ResourceManager : found issue-tracking.xml with loader org.apache.velocity.runtime.resource.loader.FileResourceLoader
Sun Jan 06 19:01:24 PST 2008 [error] Method getText threw exception for reference $escape in template issue-tracking.xml at [29,22]