Java jSoup: parse complicated xml tags

Last time I needed to parse complicated xml tags and came to the solution, described in “Android: RSS reader with complicated xml
This time, I used jSoup library and my solution became much more simple and accurate.

public void getData(String keyword, ArrayList<ToldotItem> list) {
        String urlToRssFeed = keyword;
        Log.d(Helpers.TAG, urlToRssFeed);

        org.jsoup.nodes.Document doc = null;
        try {
            doc =  Jsoup.connect(urlToRssFeed)
                    .get();

        } catch (IOException e) {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
        }

        if (doc != null)
        {
            ToldotItem item = null;

            String cssPath = "item";
            Elements links = doc.select(cssPath);

            for (org.jsoup.nodes.Element link : links) {
                Log.e(Helpers.TAG, link.text());

                org.jsoup.nodes.Document docInner = Jsoup.parse(link.outerHtml());
                item = new ToldotItem();
                Elements linksInner = docInner.select("title");
                   item.title =    linksInner.text();

                    linksInner = docInner.select("pubDate");
                    item.pubDate  = linksInner.text();

                    linksInner = docInner.select("link");
                    item.link  = linksInner.text();

                    linksInner = docInner.select("description");
                    item.description  = linksInner.text();

                    linksInner = docInner.select("guid");
                    item.guid  = linksInner.text();

                    linksInner = docInner.select("author");
                    item.author  = linksInner.text();

                    linksInner = docInner.select("media|thumbnail ");
                    item.thumbnail  = linksInner.attr("url");

                    linksInner = docInner.select("media|content[medium=video]");
                if (linksInner.size()>0)
                {
                    Log.d(Helpers.TAG, "links: "+linksInner.size());
                    item.video  = linksInner.attr("url");
                    item.videoFileSize  = linksInner.attr("fileSize");
                    item.videoDuration  = linksInner.attr("duration");
                }
                    linksInner = docInner.select("media|content[medium=audio]");
                if (linksInner.size()>0)
                {
                    item.audio  = linksInner.attr("url");
                    item.audioFileSize  = linksInner.attr("fileSize");
                    item.audioDuration  = linksInner.attr("duration");
                }
                list.add(item);

            }
        }
    }

as you see tags like “<media:content>” are parsed by searching xpath “media|content”

1 comment

Leave a Reply

%d bloggers like this: