-
Notifications
You must be signed in to change notification settings - Fork 130
/
Copy pathrecap_references_generator.rb
147 lines (130 loc) · 6.6 KB
/
recap_references_generator.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# frozen_string_literal: true
# Regex pattern to match "{% assign timestamp="xx:xx:xx" %}"
$podcast_reference_mark = /\{%\s*assign\s+timestamp\s*=\s*"([^"]+)"\s*%\}/
# Create the podcast recap references by parsing the referenced newsletter for
# podcast reference marks (timestamps)
class RecapReferencesGenerator < Jekyll::Generator
def generate(site)
podcast_pages = site.documents.select { |doc| doc.data["type"] == "podcast"}
podcast_pages.each do |podcast|
# podcast episodes have a "reference" field that indicates the related newsletter page
unless podcast.data["reference"].nil?
reference_page = site.documents.detect { |page| page.url == podcast.data["reference"] }
# override the content of the reference page (newsletter) to now include
# the links to the related podcast items
reference_page.content,
# keep all the references in a podcast page variable to use them later
# during the podcast page creation
podcast.data["references"] = get_podcast_references(reference_page.content, podcast.url)
# we use this in `newsletter-references.md` to be easier to identify
# special sections when iterating through the sections of the newsletter
podcast.data["special_sections"] = []
podcast.data["references"].each do |reference|
if reference["title"].nil?
# the title of a reference derives from the nested list items
# under a header/section (News, Releases and release candidates, etc.)
# if there are no list items, we end up with a missing title
# we use this assumption to identify special sections
podcast.data["special_sections"] << reference["header"]
# use the header as the title of the section
reference["title"] = reference["header"]
reference["slug"] = generate_slug(reference["header"])
end
# Each podcast transcript splits into segements using the paragraph title
# as the title of the segment. These segment splits must be added manually but
# we can avoid the need to also manually add their anchors by doing that here,
# where we effectivily search for the segment splits and prefix them with the anchor
reference["has_transcript_section"] =
podcast.content.sub!(
/^(_.*?#{Regexp.escape(reference["title"])}.*?_)/,
"{:#{reference["slug"]}-transcript}\n \\1"
)
end
end
end
end
def find_title(string, in_list=true)
# this conditional prefix is for the special case of the review club section
# which is not a list item (no dash (-) at the start of the line)
prefix = in_list ? / *- / : //
# Find shortest match for **bold**, or [markdown][links]
# note: when we are matching the title in `auto-anchor.rb` we also match *italics*
# but on the newsletter sections nested bullets have *italics* titles therefore
# by ignoring *italics* we are able to easier link to the outer title
title = string.match(/^#{prefix}(?:\*\*(.*?):?\*\*|\[(.*?):?\][(\[])/)&.captures&.compact&.[](0) || ""
if title.empty?
{}
else
result = {"title"=> title}
slug = {"slug"=> generate_slug(title)}
result.merge!(slug)
end
end
# This method searches the content for paragraphs that indicate that they are
# part of a podcast recap. When a paragraph is part of a recap we:
# - postfix with a link to the related podcast item
# - get the header, title and title slug of the paragraph to create
# the references for the podcast
def get_podcast_references(content, target_page_url)
# The logic here assumes that:
# - paragraphs have headers
# - each block of text (paragraph) is seperated by an empty line
# Split the content into paragraphs
paragraphs = content.split(/\n\n+/)
# Find all the headers in the content
headers = content.scan(/^#+\s+(.*)$/).flatten
# Create an array of hashes containing:
# - the paragraph's title
# - the paragraph's title slug
# - the associated header
# - the timestamp of the podcast in which this paragraph is discussed
podcast_references = []
current_header = 0
current_title = {}
in_review_club_section = false
# Iterate over all paragraphs to find those with a podcast reference mark
paragraphs.each do |p|
# a title might have multiple paragraphs associated with it
# the podcast reference mark might be at the end of an isolated
# paragraph snippet that cannot access the title, therefore
# we keep this information to be used in the link to the podcast recap
title = find_title(p, !in_review_club_section)
if !title.empty?
# paragraph has title
current_title = title
end
# If the current paragraph contains the podcast reference mark,
# capture the timestamp, add paragraph to references and replace
# the mark with link to the related podcast item
p.gsub!($podcast_reference_mark) do |match|
if in_review_club_section
# the newsletter's review club section is the only section that does
# not have a list item to use as anchor so we use the header
current_title["podcast_slug"] = "#pr-review-club" # to avoid duplicate anchor
current_title["slug"] = "#bitcoin-core-pr-review-club"
end
podcast_reference = {"header"=> headers[current_header], "timestamp"=> $1}
podcast_reference.merge!(current_title)
podcast_references << podcast_reference
if current_title.empty?
# this is needed for the podcast reference mark to link to the header
# of the special section
current_title["slug"] = generate_slug(headers[current_header])
end
# Replace the whole match with the link
headphones_link = "[<i class='fa fa-headphones' title='Listen to our discussion of this on the podcast'></i>]"
replacement_link_to_podcast_item = "#{headphones_link}(#{target_page_url}#{current_title["podcast_slug"] || current_title["slug"]})"
end
# update to the next header when parse through it
if p.sub(/^#+\s*/, "") == headers[(current_header + 1) % headers.length()]
current_header += 1
in_review_club_section = headers[current_header] == "Bitcoin Core PR Review Club"
# reset header-specific variables
current_title = {}
end
end
# Join the paragraphs back together to return the modified content
updated_content = paragraphs.join("\n\n")
[updated_content, podcast_references]
end
end