APISonar


org.apache.tika.sax.BasicContentHandlerFactory.HANDLER_TYPE.XML

> org > apache > tika > sax > BasicContentHandlerFactory > HANDLER_TYPE > XML
org APIs apache APIs tika APIs sax APIs BasicContentHandlerFactory APIs HANDLER_TYPE APIs XML APIs

Example 1
public void testBasicXML() throws Exception {
        List<Metadata> list = getMetadata(new Metadata(),
                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.XML, -1));
        Metadata container = list.get(0);
        String content = container.get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT);
        //not much differentiates html from xml in this test file
        assertTrue(content.indexOf("<p class=\"header\" />") > -1);
    }
Example 2
protected List<Metadata> getRecursiveMetadata(InputStream is, Parser p, ParseContext context, Metadata metadata,
                                                  boolean suppressException) throws Exception {
        RecursiveParserWrapper wrapper = new RecursiveParserWrapper(p);
        RecursiveParserWrapperHandler handler = new RecursiveParserWrapperHandler(
                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.XML, -1));
        try {
            wrapper.parse(is, handler, metadata, context);
        } catch (Exception e) {
            if (!suppressException) {
                throw e;
            }
        }
        return handler.getMetadataList();
    }
Example 3
public void testOCROutputsHOCR() throws Exception {
        assumeTrue("can run OCR", canRun());

        String resource = "testOCR.pdf";

        String[] nonOCRContains = new String[0];
        String contents = runOCR(resource, nonOCRContains, 2,
                BasicContentHandlerFactory.HANDLER_TYPE.XML,
                TesseractOCRConfig.OUTPUT_TYPE.HOCR);

        assertContains("<span class=\"ocrx_word\" id=\"word_1_1\"", contents);
        assertContains("Happy</span>", contents);

    }
Example 4
private ContentHandlerFactory getContentHandlerFactory(OutputType type) {
        BasicContentHandlerFactory.HANDLER_TYPE handlerType = BasicContentHandlerFactory.HANDLER_TYPE.IGNORE;
        if (type.equals(HTML)) {
            handlerType = BasicContentHandlerFactory.HANDLER_TYPE.HTML;
        } else if (type.equals(XML)) {
            handlerType = BasicContentHandlerFactory.HANDLER_TYPE.XML;
        } else if (type.equals(TEXT)) {
            handlerType = BasicContentHandlerFactory.HANDLER_TYPE.TEXT;
        } else if (type.equals(TEXT_MAIN)) {
            handlerType = BasicContentHandlerFactory.HANDLER_TYPE.BODY;
        } else if (type.equals(METADATA)) {
            handlerType = BasicContentHandlerFactory.HANDLER_TYPE.IGNORE;
        }
        return new BasicContentHandlerFactory(handlerType, -1);
    }