Creating Sitemap for an eCommerce website helps in indexing all the product pages by search engine crawlers.





Let's see how to create sitemap from an product index file.





Create a service config file

Create a scheduler

Create sitemap read and write service interface and implementation

Create models for parsing the index xml file





Sample XML file hosted on a server, we will be configuring this XML hosted URL in Scheduler

<aemquickstart

xmlns:xs="http://www.w3.org/2001/XMLSchema" version="2.0">

<channel>

<Item>

<title>

<![CDATA[ AEM Quickstart by Kishore ]]>

</title>

<ProductId>12345</ProductId>

<pubDate>02/28/2017 00:00:00.000000</pubDate>

</Item>

<Item>

<title>

<![CDATA[ Lorel Ipsum ]]>

</title>

<ProductId>56789</ProductId>

<pubDate>02/28/2019 00:00:00.000000</pubDate>

</Item>

<Item>

<title>

<![CDATA[ Create Sitemap in AEM ]]>

</title>

<ProductId>12987</ProductId>

<pubDate>03/28/2019 00:00:00.000000</pubDate>

</Item>

</channel>



</aemquickstart>

Create a service config file

package com. aemquickstart . core . configurations ; import org.osgi.service.metatype.annotations.AttributeDefinition; import org.osgi.service.metatype.annotations.AttributeType; import org.osgi.service.metatype.annotations.ObjectClassDefinition; /* * @author Kishore Polsani */ @ObjectClassDefinition(name = "AEM Quickstart Sitemap Configuration" , description = "This configuration helps in creating a product sitemap, reading data from URL" ) public @interface SitemapConfiguration { @AttributeDefinition(name = "Scheduler name" , description = "Name of the scheduler" , type = AttributeType. STRING ) public String name() default "XML Reader Scheduler" ; @AttributeDefinition(name = "Enabled" , description = "Flag to enable/disable a scheduler" , type = AttributeType. BOOLEAN ) public boolean enabled() default true ; @AttributeDefinition(name = "Cron expression" , description = "Cron expression used by the scheduler" , type = AttributeType. STRING ) public String cronExpression() default "0 * * * * ?" ; @AttributeDefinition(name = "XML file path" , description = "Path of the XML file on the system" , type = AttributeType. STRING ) public String xmlFilePath(); @AttributeDefinition(name = "XML product index file URL" , description = "URL from where XML response is to be read" , type = AttributeType. STRING ) public String xmlResponseURL(); @AttributeDefinition(name = "JCR path" , description = "Path in the JCR to store data" , type = AttributeType. STRING ) public String jcrPath() default "/content/aemquickstart/en" ; @AttributeDefinition(name = "Enter Domain" , description = "Enter domain to be used in attribute." , type = AttributeType. STRING ) public String domain() default "https://localhost" ; }





Create a scheduler

package com. aemquickstart . core . schedulers ; import org.apache.sling.commons.scheduler.ScheduleOptions; import org.apache.sling.commons.scheduler.Scheduler; import org.osgi.service.component.annotations.Activate; import org.osgi.service.component.annotations.Component; import org.osgi.service.component.annotations.Deactivate; import org.osgi.service.component.annotations.Modified; import org.osgi.service.component.annotations.Reference; import org.osgi.service.metatype.annotations.Designate; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.aemquickstart.core.configurations.SitemapConfiguration; import com.aemquickstart.core.models.ProductList; import com.aemquickstart.core.services.SitemapXmlReaderService; import com.aemquickstart.core.services.SitemapXmlWriterService; /** * @author Kishore Polsani * */ @Component(immediate = true , service = Runnable. class ) @Designate(ocd = SitemapConfiguration. class ) public class SitemapScheduler implements Runnable { private final Logger log = LoggerFactory. getLogger ( this . getClass ()); private int schedulerId; // Id of the scheduler based on its name @Reference private Scheduler scheduler; @Reference private SitemapXmlReaderService sitemapXmlReaderService; @Reference private SitemapXmlWriterService sitemapXmlWriterService; private String filePath; // XML file from where sitemap data to be read private String productIndexFileUrl; // URL from where sitemap data to be read private boolean isEnabled; private String jcrPath; private String domain; /** * Activate method to initialize sitemap * * @param sitemapXmlReaderConfiguration */ @Activate protected void activate(SitemapConfiguration sitemapXmlReaderConfiguration) { schedulerId = sitemapXmlReaderConfiguration. name (). hashCode (); filePath = sitemapXmlReaderConfiguration. xmlFilePath (); productIndexFileUrl = sitemapXmlReaderConfiguration. xmlResponseURL (); isEnabled = sitemapXmlReaderConfiguration. enabled (); jcrPath = sitemapXmlReaderConfiguration. jcrPath (); domain = sitemapXmlReaderConfiguration. domain (); log. info ( "Scheduler activated: flag={}" , isEnabled); } /** * Modifies the sitemap scheduler id on modification * * @param sitemapXmlReaderConfiguration */ @Modified protected void modified(SitemapConfiguration sitemapXmlReaderConfiguration) { // Removing sitemap scheduler removeScheduler(); // Updating the sitemap scheduler id schedulerId = sitemapXmlReaderConfiguration. name (). hashCode (); // Add the sitemap scheduler addScheduler(sitemapXmlReaderConfiguration); } /** * This method deactivates the scheduler and removes it * * @param sitemapXmlReaderConfiguration */ @Deactivate protected void deactivate(SitemapConfiguration sitemapXmlReaderConfiguration) { // Removing the scheduler removeScheduler(); } /** * This method removes the scheduler */ private void removeScheduler() { log. info ( "Removing scheduler: {}" , schedulerId); // Unscheduling/removing the scheduler scheduler. unschedule (String. valueOf (schedulerId)); } /** * This method adds the scheduler * * @param schedulerConfiguration */ private void addScheduler(SitemapConfiguration xmlReaderConfiguration) { if (isEnabled) { ScheduleOptions scheduleOptions = scheduler. EXPR (xmlReaderConfiguration. cronExpression ()); scheduleOptions. name (xmlReaderConfiguration. name ()); scheduleOptions. canRunConcurrently ( false ); scheduler. schedule ( this , scheduleOptions); log. info ( "Sitemap Scheduler {} is added" , schedulerId); } else { log. info ( "Sitemap Scheduler {} is disabled" , schedulerId); } } /** * Overridden run method to execute Job */ @Override public void run() { log. info ( "In Scheduler run(), isEnabled:{}" , isEnabled); if (isEnabled) { ProductList productList = null ; if (productIndexFileUrl != null && !productIndexFileUrl. isEmpty ()) { log. info ( "Product XML URL: {}" , productIndexFileUrl); productList = sitemapXmlReaderService. readXMLFromURL (productIndexFileUrl); sitemapXmlWriterService. createProductSiteMap (domain, productList, jcrPath, "url" ); } } else { log. info ( "Sitemap Scheduler is not enabled" ); } } }









Create an interface to read the XML file

package com. aemquickstart . core . services ; import com.aemquickstart.core.models.ProductList; public interface SitemapXmlReaderService { /** * This method writes XML data into JCR */ public ProductList readXMLFromURL(String responseURL); }





Create an implementation class to read the XML file

package com. aemquickstart . core . services . impl ; import java.io.BufferedReader; import java.io.InputStreamReader; import java.io.StringReader; import java.net.URL; import java.net.URLConnection; import java.nio.charset.Charset; import javax.xml.bind.JAXBContext; import javax.xml.bind.JAXBException; import javax.xml.bind.Unmarshaller; import org.osgi.service.component.annotations.Component; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.aemquickstart.core.models.ProductList; import com.aemquickstart.core.services.SitemapXmlReaderService; @Component(immediate = true , service = SitemapXmlReaderService. class ) public class SitemapXmlReaderServiceImpl implements SitemapXmlReaderService { // Logger private final Logger log = LoggerFactory. getLogger ( this . getClass ()); // JAXB instance private JAXBContext jaxbContext; // JAXB Unmarshaller private Unmarshaller unmarshaller; @Override public ProductList readXMLFromURL(String responseURL) { log. info ( "In readXMLFromURL" ); URLConnection urlConnection = null ; InputStreamReader inputStreamReader = null ; StringBuilder builder = new StringBuilder(); ProductList productList = null ; try { URL url = new URL(responseURL); urlConnection = url. openConnection (); if (urlConnection != null ) { urlConnection. setReadTimeout ( 30 * 1000 ); } if (urlConnection != null && urlConnection. getInputStream () != null ) { inputStreamReader = new InputStreamReader(urlConnection. getInputStream (), Charset. defaultCharset ()); BufferedReader bufferedReader = new BufferedReader(inputStreamReader); if (bufferedReader != null ) { int eof; while ((eof = bufferedReader. read ()) != - 1 ) { builder. append (( char ) eof); } bufferedReader. close (); } } log. info ( "closing input stream" ); inputStreamReader. close (); } catch (Exception e) { log. error (e. getMessage (), e); } String xmlResponse = builder. toString (); log. debug ( "xmlResponse: {}" , xmlResponse); try { jaxbContext = JAXBContext. newInstance (ProductList. class ); unmarshaller = jaxbContext. createUnmarshaller (); productList = (ProductList) unmarshaller. unmarshal ( new StringReader(xmlResponse)); log. info ( "ProductList: {}" , productList); } catch (JAXBException e) { log. info (e. getMessage (), e); } return productList; } } Create an model class to parse the XML package com. aemquickstart . core . models ; import javax.xml.bind.annotation.XmlAccessType; import javax.xml.bind.annotation.XmlAccessorType; import javax.xml.bind.annotation.XmlElement; import javax.xml.bind.annotation.XmlRootElement; @XmlRootElement(name= "aemquickstart" ) @XmlAccessorType(XmlAccessType. FIELD ) public class ProductList { @XmlElement private Channel[] channel = new Channel[ 1 ]; public Channel[] getChannel() { return channel; } public void setChannel(Channel[] channel) { this . channel = channel; } } Create Channel.java package com. aemquickstart . core . models ; import javax.xml.bind.annotation.XmlAccessType; import javax.xml.bind.annotation.XmlAccessorType; import javax.xml.bind.annotation.XmlElement; import javax.xml.bind.annotation.XmlRootElement; @XmlAccessorType(XmlAccessType. FIELD ) @XmlRootElement(name= "channel" ) public class Channel { @XmlElement private Item[] Item = new Item[ 1 ]; // as the tag name in the xml file.. public Item[] getItem() { return Item; } public void setItem(Item[] item) { Item = item; } } Create Item.java to read all elements package com. aemquickstart . core . models ; import javax.xml.bind.annotation.XmlAccessType; import javax.xml.bind.annotation.XmlAccessorType; import javax.xml.bind.annotation.XmlElement; import javax.xml.bind.annotation.XmlRootElement; @XmlRootElement(name= "Item" ) @XmlAccessorType(XmlAccessType. FIELD ) public class Item { @XmlElement private String ProductId; @XmlElement private String title; @XmlElement private String pubDate; //@XmlElement public String getProductId() { return ProductId; } public void setProductId(String productId) { ProductId = productId; } public String getTitle() { return title; } public void setTitle(String title) { this . title = title; } public String getPubDate() { return pubDate; } public void setPubDate(String pubDate) { this . pubDate = pubDate; } } Create an interface to write the sitemap to XML file

package com. aemquickstart . core . services ; import com.aemquickstart.core.models.ProductList; public interface SitemapXmlWriterService { public void createProductSiteMap(String domain, ProductList productList, String jcrPath, String from); }





Create an implementation class to create the sitemap

package com. aemquickstart . core . services . impl ; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.InputStream; import java.util.Calendar; import java.util.HashMap; import java.util.Map; import javax.jcr.Binary; import javax.jcr.Node; import javax.jcr.RepositoryException; import javax.jcr.Session; import javax.jcr.ValueFactory; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.transform.Result; import javax.xml.transform.Source; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.apache.commons.lang3.time.FastDateFormat; import org.apache.sling.api.resource.LoginException; import org.apache.sling.api.resource.ResourceResolver; import org.apache.sling.api.resource.ResourceResolverFactory; import org.osgi.service.component.annotations.Component; import org.osgi.service.component.annotations.Reference; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; import org.w3c.dom.Element; import com.aemquickstart.core.models.Channel; import com.aemquickstart.core.models.Item; import com.aemquickstart.core.models.ProductList; import com.aemquickstart.core.services.SitemapXmlWriterService; import com.day.cq.wcm.api.Page; import com.day.cq.wcm.api.PageManager; @Component(immediate = true , service = SitemapXmlWriterService. class ) public class SitemapXmlWriterServiceImpl implements SitemapXmlWriterService { // Logger private final Logger log = LoggerFactory. getLogger ( this . getClass ()); // Injecting ResourceResolverFactory @Reference private ResourceResolverFactory resourceResolverFactory; // JCR session private Session session; private Session getSession() { try { // Map for service user details ResourceResolver resourceResolver = getResourceResolver(); // Getting the session by adapting the resourceResolver session = resourceResolver. adaptTo (Session. class ); } catch (LoginException e) { log. error (e. getMessage (), e); } return session; } private ResourceResolver getResourceResolver() throws LoginException { Map xmlReaderMap = new HashMap<>(); xmlReaderMap. put (ResourceResolverFactory. SUBSERVICE , "aemquickstartSubservice" ); // Getting ResourceResovler ResourceResolver resourceResolver = resourceResolverFactory. getServiceResourceResolver (xmlReaderMap); return resourceResolver; } @Override public void createProductSiteMap(String domain, ProductList productList, String jcrPath, String from) { log. info ( "createProductSiteMap: {}" , from); ResourceResolver resourceResolver = null ; try { session = getSession(); if (!session. itemExists (jcrPath)) { log. info ( "Provided path does not exist. Sitemap file can't be created under {}" , jcrPath); return ; } else { DocumentBuilderFactory docFactory = DocumentBuilderFactory. newInstance (); DocumentBuilder docBuilder = docFactory. newDocumentBuilder (); Document doc = docBuilder. newDocument (); doc. setXmlStandalone ( true ); Element rootElement = doc. createElement ( "urlset" ); rootElement. setAttribute ( "xmlns" , "http://www.sitemaps.org/schemas/sitemap/0.9" ); doc. appendChild (rootElement); addProductsToXml(domain, rootElement, jcrPath, doc, productList); resourceResolver = getResourceResolver(); PageManager pgMgr = resourceResolver. adaptTo (PageManager. class ); Page homepage = pgMgr. getPage (jcrPath); if ( null != homepage) { String sitemapFile = jcrPath + "/sitemap_products.xml" ; generateXmlFile(jcrPath, doc, session, sitemapFile); } } } catch (Exception e) { log. error (e. getMessage (), e); } finally { resourceResolver. close (); if (session != null ) { session. logout (); } } } private void addProductsToXml(String domain, Element rootElement, String jcrPath, Document doc, ProductList productList) { // Getting the products from ProductList Channel[] channels = productList. getChannel (); // Iterate for each item present in the XML file log. info ( "Setting properties" ); Item[] products = channels[ 0 ]. getItem (); for (Item product : products) { String productId = product. getProductId (); Element pdpUrlElement = doc. createElement ( "url" ); Element pdpLoc = doc. createElement ( "loc" ); String title = product. getTitle (); String pdpUrl = domain + jcrPath + "/pdp.html/" + title. replaceAll ( "[^a-zA-Z0-9-]" , "" ) + "/" + productId; pdpLoc. appendChild (doc. createTextNode (pdpUrl)); pdpUrlElement. appendChild (pdpLoc); Calendar lastModified = Calendar. getInstance (); if ( null != lastModified) { Element pdpLstMod = doc. createElement ( "lastmod" ); FastDateFormat DATE_FORMAT = FastDateFormat. getInstance ( "yyyy-MM-dd" ); pdpLstMod. appendChild (doc. createTextNode (DATE_FORMAT. format (lastModified. getTimeInMillis ()))); pdpUrlElement. appendChild (pdpLstMod); } Element pdpChangeFreq = doc. createElement ( "changefreq" ); pdpChangeFreq. appendChild (doc. createTextNode ( "Weekly" )); pdpUrlElement. appendChild (pdpChangeFreq); rootElement. appendChild (pdpUrlElement); // PDP page is added to // root. } } private void generateXmlFile(String jcrPath, Document doc, Session session, String sitemapFile) { try { try { log. info ( "sitemap file: {}" , sitemapFile); if (session. itemExists (sitemapFile)) { log. info ( "Sitemap exists" ); session. removeItem (sitemapFile); session. save (); log. info ( "Old Sitemap is deleted" ); } } catch (Exception e) { log. error ( "Exception while removing xml file: {} " , e); } ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); Source xmlSource = new DOMSource(doc); Result outputTarget = new StreamResult(outputStream); TransformerFactory. newInstance (). newTransformer (). transform (xmlSource, outputTarget); InputStream is = new ByteArrayInputStream(outputStream. toByteArray ()); // create file at file location ValueFactory valueFactory = session. getValueFactory (); Binary contentValue; contentValue = valueFactory. createBinary (is); Node homepageNode = session. getNode (jcrPath); Node sitemapNode = homepageNode. addNode ( "sitemap_products.xml" , "nt:file" ); Node resNode = sitemapNode. addNode ( "jcr:content" , "nt:resource" ); resNode. setProperty ( "jcr:data" , contentValue); resNode. setProperty ( "jcr:mimeType" , "text/xml" ); Calendar lastModified = Calendar. getInstance (); lastModified. setTimeInMillis (lastModified. getTimeInMillis ()); resNode. setProperty ( "jcr:lastModified" , lastModified); session. save (); log. info ( "Sitemap is successfull created at {}" , sitemapFile); } catch (RepositoryException rpe) { log. error ( "Exception in Text Renderer: {}" , rpe); } catch (Exception e) { log. error ( "Exception in while writting or creating file Renderer: {}" , e); } } }





Now build the project. Open configMgr and search for "AEM Quickstart Sitemap Configuration"









Enter Scheduler name and select enabled checkbox.

Enter cross expression - to update the frequency.

Enter XML file URL - where you would like to read the product info.

Enter JCR path - where you need to upload your sitemap_products.xml file

Enter Domain - this value will be used while creating the product url for attribute

Once the scheduler is ran, sitemap_products.xml file will be created under /content/aemquickstart/en























