Commit ea07db72 authored by fjsanpedro's avatar fjsanpedro

Add sitemap generator for .com and gitlab-org group

In this commit we add a small functionality to generate the sitemap
for the `gitlab-org` group just in the .com environment.

The idea behind this is to analyze whether a broader sitemap
would improve the search crawling results.
parent 0dffbcb4
......@@ -62,6 +62,8 @@ eslint-report.html
/public/assets/
/public/uploads.*
/public/uploads/
/public/sitemap.xml
/public/sitemap.xml.gz
/shared/artifacts/
/spec/examples.txt
/rails_best_practices_output.html
......
......@@ -12,6 +12,7 @@
# only_owned: boolean
# only_shared: boolean
# limit: integer
# include_subgroups: boolean
# params:
# sort: string
# visibility_level: int
......
......@@ -378,7 +378,7 @@ class Project < ApplicationRecord
delegate :feature_available?, :builds_enabled?, :wiki_enabled?,
:merge_requests_enabled?, :forking_enabled?, :issues_enabled?,
:pages_enabled?, :public_pages?, :private_pages?,
:pages_enabled?, :snippets_enabled?, :public_pages?, :private_pages?,
:merge_requests_access_level, :forking_access_level, :issues_access_level,
:wiki_access_level, :snippets_access_level, :builds_access_level,
:repository_access_level, :pages_access_level, :metrics_dashboard_access_level,
......
---
title: Add default sitemap generator for gitlab-org group
merge_request: 45645
author:
type: added
# frozen_string_literal: true
xml_builder.instruct!
xml_builder.urlset xmlns: 'http://www.sitemaps.org/schemas/sitemap/0.9' do
urls.flatten.compact.each do |url|
xml_builder.url do
xml_builder.loc url
xml_builder.lastmod lastmod
end
end
end
# frozen_string_literal: true
module Gitlab
module Sitemaps
class Generator
class << self
include Gitlab::Routing
GITLAB_ORG_NAMESPACE = 'gitlab-org'.freeze
def execute
unless Gitlab.com?
return "The sitemap can only be generated for Gitlab.com"
end
file = Sitemaps::SitemapFile.new
if gitlab_org_group
file.add_elements(generic_urls)
file.add_elements(gitlab_org_group)
file.add_elements(gitlab_org_subgroups)
file.add_elements(gitlab_org_projects)
file.save
else
"The group '#{GITLAB_ORG_NAMESPACE}' was not found"
end
end
private
def generic_urls
[
explore_projects_url,
explore_snippets_url,
explore_groups_url
]
end
def gitlab_org_group
@gitlab_org_group ||= GroupFinder.new(nil).execute(path: 'gitlab-org', parent_id: nil, visibility_level: Gitlab::VisibilityLevel::PUBLIC)
end
def gitlab_org_subgroups
GroupsFinder.new(
nil,
parent: gitlab_org_group,
include_parent_descendants: true
).execute
end
def gitlab_org_projects
GroupProjectsFinder.new(
current_user: nil,
group: gitlab_org_group,
params: { non_archived: true },
options: { include_subgroups: true }
).execute.include_project_feature.inc_routes
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Sitemaps
class SitemapFile
SITEMAP_FILE_PATH = File.join(Rails.public_path, 'sitemap.xml').freeze
attr_accessor :urls
def initialize
@urls = []
end
def add_elements(elements = [])
elements = Array(elements)
return if elements.empty?
urls << elements.map! { |element| Sitemaps::UrlExtractor.extract(element) }
end
def save
return if urls.empty?
File.write(SITEMAP_FILE_PATH, render)
end
def render
fragment = File.read(File.expand_path("fragments/sitemap_file.xml.builder", __dir__))
instance_eval fragment
end
private
def xml_builder
@xml_builder ||= Builder::XmlMarkup.new(indent: 2)
end
def lastmod
@lastmod ||= Date.today.iso8601
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Sitemaps
class UrlExtractor
class << self
include Gitlab::Routing
def extract(element)
case element
when String
element
when Group
extract_from_group(element)
when Project
extract_from_project(element)
end
end
def extract_from_group(group)
[
group_url(group),
issues_group_url(group),
merge_requests_group_url(group),
group_packages_url(group),
group_epics_url(group)
]
end
def extract_from_project(project)
[
project_url(project),
project_issues_url(project),
project_merge_requests_url(project)
].tap do |urls|
urls << project_snippets_url(project) if project.snippets_enabled?
urls << project_wiki_url(project, Wiki::HOMEPAGE) if project.wiki_enabled?
end
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Sitemaps::Generator do
subject { described_class.execute }
it 'returns error if the env is not .com' do
expect(Gitlab).to receive(:com?).and_return(false)
expect(subject).to eq "The sitemap can only be generated for Gitlab.com"
end
context 'when env is .com' do
before do
expect(Gitlab).to receive(:com?).and_return(true)
end
it 'returns error if group gitlab-org is not found' do
expect(subject).to eq "The group 'gitlab-org' was not found"
end
context 'when group gitlab-org is present and public' do
context 'and it is not public' do
it 'returns and error' do
create(:group, :internal, path: 'gitlab-org', name: "Gitlab Org Group")
expect(subject).to eq "The group 'gitlab-org' was not found"
end
end
context 'and it is public ' do
let_it_be(:gitlab_org_group) { create(:group, :public, path: 'gitlab-org', name: "Gitlab Org Group") }
let_it_be(:public_gitlab_org_project) { create(:project, :public, namespace: gitlab_org_group) }
let_it_be(:internal_gitlab_org_project) { create(:project, :internal, namespace: gitlab_org_group) }
let_it_be(:private_gitlab_org_project) { create(:project, :private, namespace: gitlab_org_group) }
let_it_be(:public_subgroup) { create(:group, :public, path: 'group1', name: 'group1', parent: gitlab_org_group) }
let_it_be(:internal_subgroup) { create(:group, :internal, path: 'group2', name: 'group2', parent: gitlab_org_group) }
let_it_be(:public_subgroup_public_project) { create(:project, :public, namespace: public_subgroup) }
let_it_be(:public_subgroup_internal_project) { create(:project, :internal, namespace: public_subgroup) }
let_it_be(:internal_subgroup_private_project) { create(:project, :private, namespace: internal_subgroup) }
let_it_be(:internal_subgroup_internal_project) { create(:project, :internal, namespace: internal_subgroup) }
it 'includes default explore routes and gitlab-org group routes' do
new_path = Rails.root.join('tmp/tests/sitemap.xml')
stub_const('Gitlab::Sitemaps::SitemapFile::SITEMAP_FILE_PATH', new_path)
subject
content = File.read(new_path)
expect(content).to include('/explore/projects')
expect(content).to include('/explore/groups')
expect(content).to include('/explore/snippets')
expect(content).to include(gitlab_org_group.full_path)
expect(content).to include(public_subgroup.full_path)
expect(content).to include(public_gitlab_org_project.full_path)
expect(content).to include(public_subgroup_public_project.full_path)
expect(content).not_to include(internal_gitlab_org_project.full_path)
expect(content).not_to include(private_gitlab_org_project.full_path)
expect(content).not_to include(internal_subgroup.full_path)
expect(content).not_to include(public_subgroup_internal_project.full_path)
expect(content).not_to include(internal_subgroup_private_project.full_path)
expect(content).not_to include(internal_subgroup_internal_project.full_path)
File.delete(new_path)
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Sitemaps::SitemapFile do
subject do
described_class.new.tap do |file|
file.add_elements("https://gitlab.com")
end
end
describe '#render' do
it 'generates a valid sitemap file' do
freeze_time do
content = subject.render
expected_content = <<~EOS
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>https://gitlab.com</loc>
<lastmod>#{Date.today.iso8601}</lastmod>
</url>
</urlset>
EOS
expect(content).to eq expected_content
end
end
end
describe '#save' do
it 'returns if no elements has been provided' do
expect(File).not_to receive(:write)
described_class.new.save # rubocop: disable Rails/SaveBang
end
it 'stores the content in the public root folder' do
expect(subject).to receive(:render).and_call_original
expect(File).to receive(:write).with(
File.join(Rails.public_path, 'sitemap.xml'),
anything)
subject.save # rubocop: disable Rails/SaveBang
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Sitemaps::UrlExtractor do
before do
stub_default_url_options(host: 'localhost')
end
describe '.extract' do
subject { described_class.extract(element) }
context 'when element is a string' do
let(:element) { "https://gitlab.com" }
it 'returns the string without any processing' do
expect(subject).to eq element
end
end
context 'when element is a group' do
let(:element) { build(:group) }
it 'calls .extract_from_group' do
expect(described_class).to receive(:extract_from_group)
subject
end
end
context 'when element is a project' do
let(:element) { build(:project) }
it 'calls .extract_from_project' do
expect(described_class).to receive(:extract_from_project)
subject
end
end
context 'when element is unknown' do
let(:element) { build(:user) }
it 'returns nil' do
expect(subject).to be_nil
end
end
end
describe '.extract_from_group' do
let(:group) { build(:group) }
subject { described_class.extract_from_group(group) }
it 'returns several group urls' do
expected_urls = [
"http://localhost/#{group.full_path}",
"http://localhost/groups/#{group.full_path}/-/issues",
"http://localhost/groups/#{group.full_path}/-/merge_requests",
"http://localhost/groups/#{group.full_path}/-/packages",
"http://localhost/groups/#{group.full_path}/-/epics"
]
expect(subject).to match_array(expected_urls)
end
end
describe '.extract_from_project' do
let(:project) { build(:project) }
subject { described_class.extract_from_project(project) }
it 'returns several project urls' do
expected_urls = [
"http://localhost/#{project.full_path}",
"http://localhost/#{project.full_path}/-/issues",
"http://localhost/#{project.full_path}/-/merge_requests",
"http://localhost/#{project.full_path}/-/snippets",
"http://localhost/#{project.full_path}/-/wikis/home"
]
expect(subject).to match_array(expected_urls)
end
context 'when wiki is disabled' do
let(:project) { build(:project, :wiki_disabled) }
it 'does not include wiki url' do
expect(subject).not_to include("http://localhost/#{project.full_path}/-/wiki_home")
end
end
context 'when snippets are disabled' do
let(:project) { build(:project, :snippets_disabled) }
it 'does not include snippets url' do
expect(subject).not_to include("http://localhost/#{project.full_path}/-/wiki_home")
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment