View Javadoc

1   /*
2    * Created on Feb 5, 2005
3    *
4    * TODO To change the template for this generated file go to
5    * Window - Preferences - Java - Code Style - Code Templates
6    */
7   package net.sf.gumshoe.indexer;
8   
9   import java.io.File;
10  import java.io.FileNotFoundException;
11  import java.io.IOException;
12  import java.io.Reader;
13  import java.util.List;
14  
15  import org.apache.lucene.document.DateField;
16  import org.apache.lucene.document.Document;
17  import org.apache.lucene.document.Field;
18  import org.ccil.cowan.tagsoup.Parser;
19  import org.xml.sax.InputSource;
20  import org.xml.sax.SAXException;
21  import org.xml.sax.XMLReader;
22  
23  /***
24   * @author Gabor
25   *
26   * TODO To change the template for this generated type comment go to
27   * Window - Preferences - Java - Code Style - Code Templates
28   */
29  public abstract class ContentReader {
30  	public static final String MODIFIED = "modified";
31  	public static final String FILENAME = "filename";
32  	public static final String FILECATEGORY = "filecategory";	
33  	public static final String CONTENTS = "contents";
34  
35  	/*** Generate index entry for file
36  	 * @param f file to be indexed
37  	 * @return index entry
38  	 * @throws Exception
39  	 */
40  	public abstract Document getDocument(File f) throws Exception;
41  	
42  	/*** List extensions supported by this reader
43  	 * @return list of extensions
44  	 */
45  	public abstract List getSupportedExtensions();
46  	
47  	/*** Category for this reader. Currently all readers return empty string.
48  	 * Later this will allow developing a "type" based interface with
49  	 * special search fields for various categories, e.g. an email would
50  	 * have a from, to, cc, subject, etc. fields. 
51  	 * 
52  	 * @return category (like email, document, etc.)
53  	 */
54  	public abstract String getCategory();
55  
56  	/*** Convenience method, adds default fields to index entry
57  	 * @param f file to be indexed
58  	 * @param doc index entry
59  	 * @throws IOException
60  	 */
61  	protected void addDefaultFields(File f, Document doc, String category) throws IOException {
62  		doc.add(Field.Keyword(FILENAME, f.getCanonicalPath()));
63  		doc.add(Field.Keyword(FILECATEGORY, category));
64  		doc.add(Field.Keyword(MODIFIED, DateField.timeToString(f.lastModified())));
65  	}
66  	/*** Convenience method to process XML content
67  	 * @param input an XML reader
68  	 * @return a Reader containing all content for this XML input
69  	 * @throws IOException
70  	 * @throws SAXException
71  	 * @throws FileNotFoundException
72  	 */
73  	protected Reader getContentFromXML(Reader input) throws IOException, SAXException, FileNotFoundException {
74  		XMLReader r=new Parser();
75  		XMLSAXHandler ch=new XMLSAXHandler();
76  		r.setContentHandler(ch);
77  		r.parse(new InputSource(input));
78  		return ch.getContent();
79  	}
80  }