| OLD | NEW |
| (Empty) | |
| 1 # Sitemap.xml Generator is a Jekyll plugin that generates a sitemap.xml file by |
| 2 # traversing all of the available posts and pages. |
| 3 # pke: modified to use site.config['sitemap']['url'] instead of MY_URL |
| 4 # |
| 5 # How To Use: |
| 6 # 1.) Copy source file into your _plugins folder within your Jekyll project. |
| 7 # 2.) Set url or sitemap: url to reflect your domain name. |
| 8 # 3.) Set sitemap: filename if you want your sitemap to be called something |
| 9 # other than sitemap.xml. |
| 10 # 4.) Change the PAGES_INCLUDE_POSTS list to include any pages that are loopin
g |
| 11 # through your posts (e.g. "index.html", "archive.html", etc.). This will |
| 12 # ensure that right after you make a new post, the last modified date will
|
| 13 # be updated to reflect the new post. |
| 14 # 5.) Run Jekyll: jekyll --server to re-generate your site. |
| 15 # 6.) A sitemap.xml should be included in your _site folder. |
| 16 # |
| 17 # Customizations: |
| 18 # 1.) If there are any files you don't want included in the sitemap, add them |
| 19 # to the EXCLUDED_FILES list. The name should match the name of the source
|
| 20 # file. |
| 21 # 2.) If you want to include the optional changefreq and priority attributes, |
| 22 # simply include custom variables in the YAML Front Matter of that file. |
| 23 # The names of these custom variables are defined below in the |
| 24 # CHANGE_FREQUENCY_CUSTOM_VARIABLE_NAME and PRIORITY_CUSTOM_VARIABLE_NAME |
| 25 # constants. |
| 26 # |
| 27 # Notes: |
| 28 # 1.) The last modified date is determined by the latest from the following: |
| 29 # system modified date of the page or post, system modified date of |
| 30 # included layout, system modified date of included layout within that |
| 31 # layout, ... |
| 32 # |
| 33 # Author: Michael Levin |
| 34 # Site: http://www.kinnetica.com |
| 35 # Distributed Under A Creative Commons License |
| 36 # - http://creativecommons.org/licenses/by/3.0/ |
| 37 |
| 38 require 'rexml/document' |
| 39 |
| 40 module Jekyll |
| 41 |
| 42 # Change MY_URL to reflect the site you are using |
| 43 MY_URL = "http://www.dartlang.org" |
| 44 |
| 45 # Change SITEMAP_FILE_NAME if you would like your sitemap file |
| 46 # to be called something else |
| 47 SITEMAP_FILE_NAME = "sitemap.xml" |
| 48 |
| 49 # Any files to exclude from being included in the sitemap.xml |
| 50 EXCLUDED_FILES = ["atom.xml"] |
| 51 |
| 52 # Any files that include posts, so that when a new post is added, the last |
| 53 # modified date of these pages should take that into account |
| 54 #PAGES_INCLUDE_POSTS = ["index.html"] |
| 55 PAGES_INCLUDE_POSTS = [] |
| 56 |
| 57 # Custom variable names for changefreq and priority elements |
| 58 # These names are used within the YAML Front Matter of pages or posts |
| 59 # for which you want to include these properties |
| 60 CHANGE_FREQUENCY_CUSTOM_VARIABLE_NAME = "change_frequency" |
| 61 PRIORITY_CUSTOM_VARIABLE_NAME = "priority" |
| 62 |
| 63 class Post |
| 64 attr_accessor :name |
| 65 |
| 66 def full_path_to_source |
| 67 File.join(@base, @name) |
| 68 end |
| 69 |
| 70 def location_on_server(my_url) |
| 71 "#{my_url}#{url}" |
| 72 end |
| 73 end |
| 74 |
| 75 class Page |
| 76 attr_accessor :name |
| 77 |
| 78 def full_path_to_source |
| 79 File.join(@base, @dir, @name) |
| 80 end |
| 81 |
| 82 def location_on_server(my_url) |
| 83 location = "#{my_url}#{@dir}#{url}" |
| 84 location.gsub(/index.html$/, "") |
| 85 end |
| 86 end |
| 87 |
| 88 class Layout |
| 89 def full_path_to_source |
| 90 File.join(@base, @name) |
| 91 end |
| 92 end |
| 93 |
| 94 # Recover from strange exception when starting server without --auto |
| 95 class SitemapFile < StaticFile |
| 96 def write(dest) |
| 97 begin |
| 98 super(dest) |
| 99 rescue |
| 100 end |
| 101 |
| 102 true |
| 103 end |
| 104 end |
| 105 |
| 106 class SitemapGenerator < Generator |
| 107 |
| 108 # Valid values allowed by sitemap.xml spec for change frequencies |
| 109 VALID_CHANGE_FREQUENCY_VALUES = ["always", "hourly", "daily", "weekly", |
| 110 "monthly", "yearly", "never"] |
| 111 |
| 112 # Goes through pages and posts and generates sitemap.xml file |
| 113 # |
| 114 # Returns nothing |
| 115 def generate(site) |
| 116 sitemap = REXML::Document.new << REXML::XMLDecl.new("1.0", "UTF-8") |
| 117 |
| 118 urlset = REXML::Element.new "urlset" |
| 119 urlset.add_attribute("xmlns", |
| 120 "http://www.sitemaps.org/schemas/sitemap/0.9") |
| 121 |
| 122 @last_modified_post_date = fill_posts(site, urlset) |
| 123 fill_pages(site, urlset) |
| 124 |
| 125 sitemap.add_element(urlset) |
| 126 |
| 127 # Create destination directory if it doesn't exist yet. Otherwise, we cann
ot write our file there. |
| 128 FileUtils::mkdir_p(site.dest) if !File.directory? site.dest |
| 129 |
| 130 # File I/O: create sitemap.xml file and write out pretty-printed XML |
| 131 filename = site.config['sitemap']['filename'] if site.config['sitemap'] |
| 132 filename ||= SITEMAP_FILE_NAME |
| 133 file = File.new(File.join(site.dest, filename), "w") |
| 134 formatter = REXML::Formatters::Pretty.new(4) |
| 135 formatter.compact = true |
| 136 formatter.write(sitemap, file) |
| 137 file.close |
| 138 |
| 139 # Keep the sitemap.xml file from being cleaned by Jekyll |
| 140 site.static_files << Jekyll::SitemapFile.new(site, site.dest, "/", filenam
e) |
| 141 end |
| 142 |
| 143 # Create url elements for all the posts and find the date of the latest one |
| 144 # |
| 145 # Returns last_modified_date of latest post |
| 146 def fill_posts(site, urlset) |
| 147 last_modified_date = nil |
| 148 site.posts.each do |post| |
| 149 if !excluded?(post.name) |
| 150 url = fill_url(site, post) |
| 151 urlset.add_element(url) |
| 152 end |
| 153 |
| 154 path = post.full_path_to_source |
| 155 date = File.mtime(path) |
| 156 last_modified_date = date if last_modified_date == nil or date > last_mo
dified_date |
| 157 end |
| 158 |
| 159 last_modified_date |
| 160 end |
| 161 |
| 162 # Create url elements for all the normal pages and find the date of the |
| 163 # index to use with the pagination pages |
| 164 # |
| 165 # Returns last_modified_date of index page |
| 166 def fill_pages(site, urlset) |
| 167 site.pages.each do |page| |
| 168 if !excluded?(page.name) |
| 169 path = page.full_path_to_source |
| 170 if File.exists?(path) |
| 171 url = fill_url(site, page) |
| 172 urlset.add_element(url) |
| 173 end |
| 174 end |
| 175 end |
| 176 end |
| 177 |
| 178 # Fill data of each URL element: location, last modified, |
| 179 # change frequency (optional), and priority. |
| 180 # |
| 181 # Returns url REXML::Element |
| 182 def fill_url(site, page_or_post) |
| 183 url = REXML::Element.new "url" |
| 184 |
| 185 loc = fill_location(site, page_or_post) |
| 186 url.add_element(loc) |
| 187 |
| 188 lastmod = fill_last_modified(site, page_or_post) |
| 189 url.add_element(lastmod) if lastmod |
| 190 |
| 191 if (page_or_post.data[CHANGE_FREQUENCY_CUSTOM_VARIABLE_NAME]) |
| 192 change_frequency = |
| 193 page_or_post.data[CHANGE_FREQUENCY_CUSTOM_VARIABLE_NAME].downcase |
| 194 |
| 195 if (valid_change_frequency?(change_frequency)) |
| 196 changefreq = REXML::Element.new "changefreq" |
| 197 changefreq.text = change_frequency |
| 198 url.add_element(changefreq) |
| 199 else |
| 200 puts "ERROR: Invalid Change Frequency In #{page_or_post.name}" |
| 201 end |
| 202 end |
| 203 |
| 204 if (page_or_post.data[PRIORITY_CUSTOM_VARIABLE_NAME]) |
| 205 priority_value = page_or_post.data[PRIORITY_CUSTOM_VARIABLE_NAME] |
| 206 if valid_priority?(priority_value) |
| 207 priority = REXML::Element.new "priority" |
| 208 priority.text = page_or_post.data[PRIORITY_CUSTOM_VARIABLE_NAME] |
| 209 url.add_element(priority) |
| 210 else |
| 211 puts "ERROR: Invalid Priority In #{page_or_post.name}" |
| 212 end |
| 213 end |
| 214 |
| 215 url |
| 216 end |
| 217 |
| 218 # Get URL location of page or post |
| 219 # |
| 220 # Returns the location of the page or post |
| 221 def fill_location(site, page_or_post) |
| 222 loc = REXML::Element.new "loc" |
| 223 url = site.config['sitemap']['url'] if site.config['sitemap'] |
| 224 url ||= site.config['url'] || MY_URL |
| 225 loc.text = page_or_post.location_on_server(url) |
| 226 |
| 227 loc |
| 228 end |
| 229 |
| 230 # Fill lastmod XML element with the last modified date for the page or post. |
| 231 # |
| 232 # Returns lastmod REXML::Element or nil |
| 233 def fill_last_modified(site, page_or_post) |
| 234 path = page_or_post.full_path_to_source |
| 235 |
| 236 lastmod = REXML::Element.new "lastmod" |
| 237 date = File.mtime(path) |
| 238 latest_date = find_latest_date(date, site, page_or_post) |
| 239 |
| 240 if @last_modified_post_date == nil |
| 241 # This is a post |
| 242 lastmod.text = latest_date.iso8601 |
| 243 else |
| 244 # This is a page |
| 245 if posts_included?(page_or_post.name) |
| 246 # We want to take into account the last post date |
| 247 final_date = greater_date(latest_date, @last_modified_post_date) |
| 248 lastmod.text = final_date.iso8601 |
| 249 else |
| 250 lastmod.text = latest_date.iso8601 |
| 251 end |
| 252 end |
| 253 lastmod |
| 254 end |
| 255 |
| 256 # Go through the page/post and any implemented layouts and get the latest |
| 257 # modified date |
| 258 # |
| 259 # Returns formatted output of latest date of page/post and any used layouts |
| 260 def find_latest_date(latest_date, site, page_or_post) |
| 261 layouts = site.layouts |
| 262 layout = layouts[page_or_post.data["layout"]] |
| 263 while layout |
| 264 path = layout.full_path_to_source |
| 265 date = File.mtime(path) |
| 266 |
| 267 latest_date = date if (date > latest_date) |
| 268 |
| 269 layout = layouts[layout.data["layout"]] |
| 270 end |
| 271 |
| 272 latest_date |
| 273 end |
| 274 |
| 275 # Which of the two dates is later |
| 276 # |
| 277 # Returns latest of two dates |
| 278 def greater_date(date1, date2) |
| 279 if (date1 >= date2) |
| 280 date1 |
| 281 else |
| 282 date2 |
| 283 end |
| 284 end |
| 285 |
| 286 # Is the page or post listed as something we want to exclude? |
| 287 # |
| 288 # Returns boolean |
| 289 def excluded?(name) |
| 290 EXCLUDED_FILES.include? name |
| 291 end |
| 292 |
| 293 def posts_included?(name) |
| 294 PAGES_INCLUDE_POSTS.include? name |
| 295 end |
| 296 |
| 297 # Is the change frequency value provided valid according to the spec |
| 298 # |
| 299 # Returns boolean |
| 300 def valid_change_frequency?(change_frequency) |
| 301 VALID_CHANGE_FREQUENCY_VALUES.include? change_frequency |
| 302 end |
| 303 |
| 304 # Is the priority value provided valid according to the spec |
| 305 # |
| 306 # Returns boolean |
| 307 def valid_priority?(priority) |
| 308 begin |
| 309 priority_val = Float(priority) |
| 310 return true if priority_val >= 0.0 and priority_val <= 1.0 |
| 311 rescue ArgumentError |
| 312 end |
| 313 |
| 314 false |
| 315 end |
| 316 end |
| 317 end |
| OLD | NEW |