!752 lib 1004690 // tika import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.parser.AutoDetectParser; import org.apache.tika.parser.ParseContext; import org.apache.tika.sax.BodyContentHandler; static Map processFile(File file) ctex { new HashMap map; InputStream input = new FileInputStream(file); try { BodyContentHandler handler = new BodyContentHandler(-1); Metadata metadata = new Metadata(); AutoDetectParser parser = new AutoDetectParser(); ParseContext parseContext = new ParseContext(); parser.parse(input, handler, metadata, parseContext); map.put("text", handler.toString()); map.put("title", metadata.get(TikaCoreProperties.TITLE)); map.put("pageCount", metadata.get("xmpTPg:NPages")); } finally { input.close(); } return map; } p { Map extractedMap = processFile(new File(or(get(args, 0), "/home/stefan/Desktop/maude-primer.pdf"))); S text = (S) extractedMap.get("text"); print(text); print(l(text)); }