diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..6757a0e --- /dev/null +++ b/.editorconfig @@ -0,0 +1,15 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true + +[*.py] +indent_style = tab +indent_size = 4 + +[*.{html,css}] +indent_style = tab +indent_size = 2 diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..c659b65 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "files.associations": { + "*.html": "jinja-html" + } +} diff --git a/docker-compose.yml b/docker-compose.yml index 4b8a496..65ba04c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,7 +4,7 @@ services: build: ./view/ command: python app.py ports: - - "5000:5000" + - "5050:5000" volumes: - ./view:/app - ./data:/data diff --git a/misc/article_id_generator.py b/misc/article_id_generator.py new file mode 100644 index 0000000..ab9e445 --- /dev/null +++ b/misc/article_id_generator.py @@ -0,0 +1,44 @@ +#!/usr/bin/python3 + +# +# Create a UID of the article in old articles where we don't have RSS UID and where we can't generate the article_id on the fly. +# It takes a while, but it's a one-shot. +# + +import sqlite3 +import hashlib + +db_con = sqlite3.connect("../data/diffs.db") +db = db_con.cursor() + + + + + + + +def create_article_id(uid, feed): + # Create a fake unique ID from RSS unique tag and feed name to reference the article in database + id_string = str(uid) + str(feed) + id_bytes = id_string.encode('utf-8') + article_id = hashlib.sha256(id_bytes).hexdigest() + return(article_id) + + +def update_diff(diff_id, article_id): + sql = "UPDATE diffs SET article_id = ? WHERE diff_id = ?" + sql_data = (article_id, diff_id) + db.execute(sql, sql_data) + db_con.commit() + + + +db.execute( + "SELECT * FROM diffs WHERE NOT 'article_id' ORDER BY diff_id DESC ", +) +diffs = db.fetchall() + +for diff in diffs: + article_id = create_article_id(diff[1], diff[2]) + update_diff(diff[0], article_id) + print(article_id) \ No newline at end of file diff --git a/processor/app.py b/processor/app.py index 58c5bac..bb06ba7 100644 --- a/processor/app.py +++ b/processor/app.py @@ -6,6 +6,7 @@ import redis import time import json import sqlite3 +import hashlib from diff_match_patch import diff_match_patch @@ -34,6 +35,7 @@ db = db_con.cursor() db.executescript(""" CREATE TABLE IF NOT EXISTS diffs ( diff_id INTEGER PRIMARY KEY, + article_id TEXT, feed_name TEXT NOT NULL, article_url TEXT NOT NULL, title_orig TEXT NOT NULL, @@ -84,8 +86,8 @@ def process_diff(old, new, rss_id): # print(old['link']) # print(diff) - sql = "INSERT INTO diffs(feed_name, article_url, title_orig, title_new, diff_html, diff_time) VALUES (?,?,?,?,?,datetime('now', 'localtime'))" - sql_data = (old['medium'], old['link'], old['title'], new['title'], html_diff) + sql = "INSERT INTO diffs(article_id, feed_name, article_url, title_orig, title_new, diff_html, diff_time) VALUES (?,?,?,?,?,datetime('now', 'localtime'))" + sql_data = (new['article_id'], old['medium'], old['link'], old['title'], new['title'], html_diff) db.execute(sql, sql_data) db_con.commit() @@ -108,7 +110,12 @@ def process_item(article, rc): # Article is new, just create it and exit write_article(article, rc) - +def create_article_id(uid, feed): + # Create a unique ID from RSS unique tag and feed name to reference the article in database + id_string = str(uid) + str(feed) + id_bytes = id_string.encode('utf-8') + article_id = hashlib.sha256(id_bytes).hexdigest() + return(article_id) for feed in config['feeds']: @@ -123,11 +130,13 @@ for feed in config['feeds']: try: rss_id = item[unique_tag] title = item['title'] + article_id = create_article_id(rss_id, name) #description = item['description'] ## Don't store description for now, as we don't need it and it's big. published = time.strftime('%Y:%m:%d %H:%M:%S %Z %z', item['published_parsed']) link = item['link'] article_data = { 'title' : title, + 'article_id': article_id, #'description': description, 'published' : published, 'link' : link, diff --git a/view/app.py b/view/app.py index 079fadf..8604279 100644 --- a/view/app.py +++ b/view/app.py @@ -56,8 +56,8 @@ def index(): page = request.args.get(get_page_parameter(), type=int, default=1) pagination = Pagination(page=page, total=diff_count, record_name='diffs', css_framework='bootstrap5') - - + + page_skip = pagination.skip per_page = pagination.per_page if query: @@ -80,6 +80,16 @@ def index(): ) +@app.route("/article/") +def article_detail(article_id: str): + db = get_db().cursor() + db.execute("SELECT * FROM diffs WHERE article_id = ?", (article_id,)) + result = db.fetchall() + article_url = result[0]['article_url'] + # TODO: Handle if nothing is found and return 404 in that case. + return render_template("article_detail.html", article_id=article_id, article_url=article_url, diffs=result ) + + @app.route('/about') def about(): return render_template('about.html') diff --git a/view/static/main.css b/view/static/main.css index e69de29..d301f6f 100644 --- a/view/static/main.css +++ b/view/static/main.css @@ -0,0 +1,372 @@ +/* Global */ + +:root { + --border-color: hsl(0 0% 80% / 60%); + --accent-color: hsl(225 90% 50%); + --accent-color-pressed: hsl(225 90% 35%); + --color-muted: hsl(0 0% 50%); + --radius-s: 0.25em; + --radius-m: 0.5em; + --font-size-m: 1rem; + --font-size-s: 0.85rem; + --font-size-xs: 0.75rem; + --font-size-l: 1.25rem; + --box-shadow: 0 2px 0 hsl(0 0% 50% / 20%); +} + +html { + box-sizing: border-box; + font-family: "Inter", system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", + Roboto, Oxygen, Ubuntu, Cantarell, "Open Sans", "Helvetica Neue", sans-serif; +} + +body { + min-height: 100vh; + display: flex; + flex-direction: column; + align-items: stretch; + line-height: 1.5; + background-color: hsl(0 0% 98%); +} + +*, +*::before, +*::after { + box-sizing: inherit; +} + +* { + margin: 0; +} + +body { + line-height: 1.5; +} + +img, +picture, +video, +canvas, +svg { + display: block; + max-width: 100%; +} + +input, +button, +textarea, +select { + font: inherit; +} + +p, +h1, +h2, +h3, +h4, +h5, +h6 { + overflow-wrap: break-word; + font-size: inherit; + font-weight: inherit; +} + +table, +th, +tr, +td { + text-align: inherit; + border-collapse: collapse; +} + +a { + color: var(--accent-color); + font-weight: 500; + text-decoration: none; +} + +a:hover { + color: var(--accent-color-pressed); +} + +ins { + background-color: hsl(120 100% 95%); + text-decoration-color: hsl(120 50% 75% / 50%); +} + +del { + background-color: hsl(0 100% 95%); + text-decoration-color: hsl(0 50% 40% / 50%); +} + +code { + font-size: inherit; +} + +summary { + cursor: pointer; + list-style: none; + display: flex; + align-items: center; + gap: 0.5em; +} + +summary::-webkit-details-marker { + display: none; +} + +summary::before { + content: url('data:image/svg+xml,'); + display: grid; + place-content: center; + transition: transform 120ms; +} + +details[open] summary::before { + transform: rotate(90deg); +} + +.header { + padding-top: 1rem; + padding-bottom: 1rem; + background-color: white; +} + +.header:not(.header-extended) { + border-bottom: 1px solid var(--border-color); + margin-bottom: 2rem; +} + +.header .container { + display: flex; + align-items: center; + gap: 2rem; +} + +.header nav { + display: flex; + align-items: center; + gap: 1rem; +} + +.header-link-home { + color: inherit; + text-decoration: none; +} + +.header h1 { + font-size: 1.5rem; + font-weight: 700; +} + +.header h1 .del { + text-decoration: line-through; + text-decoration-thickness: 2px; +} + +.header h1 .ins { + text-decoration: underline; + text-decoration-thickness: 1px; + text-decoration-style: wavy; +} + +.main { + margin-bottom: auto; +} + +.footer { + margin-top: 4rem; + padding: 1.5rem 0; + color: hsl(0 0% 60%); +} + +.footer a { + color: inherit; +} + +.footer-container { + display: flex; + align-items: center; + gap: 0.5rem; +} + +.footer-nologo { + display: inline-block; + fill: hsl(0 0% 60%); + transition: fill 120ms; +} + +.footer-nologo:hover { + fill: hsl(0 0% 40%); +} + +.container { + max-width: 1200px; + margin: 0 auto; + padding: 0 2rem; +} + +.input { + border-radius: var(--radius-s); + border: 1px solid var(--border-color); + padding: 0.375rem 0.75rem; + font-size: var(--font-size-m); + transition: border-color 120ms, box-shadow 120ms; +} + +.input:focus { + border-color: var(--accent-color); + box-shadow: 0 0 0 2px hsl(225 90% 40% / 50%); + outline: none; +} + +.button { + display: inline-flex; + align-items: center; + gap: 0.5em; + border-radius: var(--radius-s); + border: 1px solid var(--border-color); + padding: 0.375rem 0.75rem; + background: hsl(0 0% 95%); + font-size: var(--font-size-m); + transition: background-color 120ms; + color: hsl(0 0% 40%); + font-weight: 500; + font-size: 0.9em; + line-height: 1.5rem; +} + +.button:not(:disabled) { + cursor: pointer; +} + +.button:hover { + background: hsl(0 0% 90%); +} + +.card { + border: 1px solid var(--border-color); + border-radius: var(--radius-m); + background-color: white; + box-shadow: 0 2px 0 hsl(0 0% 50% / 20%); + overflow: hidden; +} + +.pagination { + list-style-type: none; + padding: 0; + display: flex; + margin-bottom: 1rem; +} + +.page-link { + display: block; + padding: 0.5em 1em; + text-decoration: none; + color: inherit; +} + +.page-item { + border: 1px solid var(--border-color); + background: hsl(0 0% 95%); + transition: background-color 120ms; + color: hsl(0 0% 40%); + font-weight: 500; + font-size: 0.85em; + line-height: 1.5rem; +} + +.page-item:not(.active):hover { + background: hsl(0 0% 90%); +} + +.page-item:first-of-type { + border-radius: var(--radius-s) 0 0 var(--radius-s); +} + +.page-item:last-of-type { + border-radius: 0 var(--radius-s) var(--radius-s) 0; +} + +.page-item:not(:last-of-type) { + border-right-width: 0px; +} + +.page-item.active { + background-color: var(--accent-color); + color: white; + border-color: transparent; +} + +.pagination-page-info { + color: var(--color-muted); +} + +.prose p:not(:last-of-type) { + margin-bottom: 1rem; +} + +/* Index */ + +.filters { + margin-bottom: 2rem; + position: sticky; + top: 0; + background-color: white; + padding: 0.75rem 0; + border-bottom: 1px solid var(--border-color); +} + +.changesets { + margin-bottom: 2rem; +} + +.changeset { + padding: 1rem 1.5rem; + margin-bottom: 1rem; +} + +.changeset-actions { + display: flex; + gap: 1rem; + margin-bottom: 0.75rem; +} + +.changeset-feed-name, +.changeset-time, +.changeset-action { + font-weight: 500; + font-size: var(--font-size-s); +} + +.changeset-action { + display: inline-flex; + align-items: center; + gap: 0.5em; + font-weight: 500; + color: var(--color-muted); + text-decoration: none; + transition: color 200ms; +} + +.changeset-action:first-of-type { + margin-left: 0.5em; +} + +.changeset-title { + font-size: var(--font-size-l); +} + +.inline-icon { + display: inline-block; + fill: currentColor; + width: 1.25em; +} + +.changeset details[open] summary { + margin-bottom: 1rem; +} + +.changeset table th { + padding-right: 1rem; +} diff --git a/view/templates/about.html b/view/templates/about.html index d906a45..641e292 100644 --- a/view/templates/about.html +++ b/view/templates/about.html @@ -1,42 +1,32 @@ - - +{% extends "base.html" %} - - {% include 'parts/head.html' %} - +{% endblock head %} -html { - position: relative; - min-height: 100%; - } - -body { - margin-bottom: 60px; /* Margin bottom by footer height */ - } - -.footer { - position: absolute; - bottom: 0; - width: 100%; - } - - - - - - -
-
-

Headliner

-

Headliner is monitoring rss feeds of czech news websites for changes in article headlines. Just - because it might be interesting.

-

See the source code, but be aware that it's not too nice. - Feel free to improve it.

-

If you want to access the raw data collected by this tool, you can download the full archive from our git

-
-
- - {% include 'parts/footer.html' %} - - - \ No newline at end of file +{% block body %} +
+
+

+ Headliner is monitoring rss feeds of czech news websites for changes in + article headlines. Just because it might be interesting. +

+

+ Check out the source code, + but be aware that it's not too nice. Feel free to improve it or run the tool yourself. +

+ +

+ If you want to access the raw data collected by this tool, you can + download the full archive from our git repo. +

+ +

Ondřej and Bain made important contribution to the project. Thank you!

+
+
+{% endblock body %} diff --git a/view/templates/article_detail.html b/view/templates/article_detail.html new file mode 100644 index 0000000..82367d0 --- /dev/null +++ b/view/templates/article_detail.html @@ -0,0 +1,47 @@ +{% extends "base.html" %} + +{% block head %} + +{% endblock head %} + +{% block body %} +
+

Diffs for the article at {{ article_url|truncate(50) }}

+
+ +
+ + {% for diff in diffs %} + + + + + {% endfor %} +
{{ diff.diff_time }}{{ diff.diff_html|safe }}
+
+
+{% endblock body %} diff --git a/view/templates/base.html b/view/templates/base.html new file mode 100644 index 0000000..1e72205 --- /dev/null +++ b/view/templates/base.html @@ -0,0 +1,41 @@ + + + + + + Headliner + + + + {% block head %}{% endblock %} + + +
+ +
+
+ {% block body %}{% endblock %} +
+ {% include "parts/footer.html" %} + + diff --git a/view/templates/feeds.html b/view/templates/feeds.html index 8c15342..dd4457c 100644 --- a/view/templates/feeds.html +++ b/view/templates/feeds.html @@ -1,35 +1,27 @@ - - +{% extends "base.html" %} - - {% include 'parts/head.html' %} - +{% block body %} +
+
+ + + + + + + + + + {% for feed in feeds %} + + + + - - -
-
-
NameRSS/Atom URLUnique tag
{{ feed.feed_name }}{{ feed.rss_source | urlize(target="_blank") }}{{ feed.unique_tag }}
- - - - - - - - - {% for feed in feeds %} - - - - - - {% endfor %} - -
NameRSS/Atom URLUnique tag
{{ feed.feed_name }}{{ feed.rss_source | urlize(target="_blank") }}{{ feed.unique_tag }}
-
+ {% endfor %} + + +
- {% include 'parts/footer.html' %} - - - \ No newline at end of file + +{% endblock body %} diff --git a/view/templates/index.html b/view/templates/index.html index 4dea652..edc7d0a 100644 --- a/view/templates/index.html +++ b/view/templates/index.html @@ -1,80 +1,68 @@ - - +{% extends "base.html" %} - - {% include 'parts/head.html' %} +{% block header_class %}header-extended{% endblock header_class %} - - - - - -
-
-
- - -
-
-
- - - - - - - - - - - - {% for diff in diffs %} - - - - - - - - {% endfor %} - -
Detection timeSourceDiffOriginalChanged
{{ diff.diff_time }}{{ diff.feed_name }}{{ diff.diff_html|safe }} - {{ diff.title_orig|truncate(15) }} - {{ diff.title_orig }} - - {{ diff.title_new|truncate(15) }} - {{ diff.title_new}} -
-
-
-
-
- {{ pagination.links }} -
-
- {{ pagination.info }} -
-
-
-
- {% include 'parts/footer.html' %} - - - +
+ {{ pagination.links }} + {{ pagination.info }} +
+{% endblock body %} diff --git a/view/templates/parts/footer.html b/view/templates/parts/footer.html index 64bac23..6ddc5f9 100644 --- a/view/templates/parts/footer.html +++ b/view/templates/parts/footer.html @@ -1,5 +1,16 @@ - \ No newline at end of file + diff --git a/view/templates/parts/head.html b/view/templates/parts/head.html deleted file mode 100644 index 50739f8..0000000 --- a/view/templates/parts/head.html +++ /dev/null @@ -1,7 +0,0 @@ - - -Headliner - - - - \ No newline at end of file diff --git a/view/templates/parts/header.html b/view/templates/parts/header.html new file mode 100644 index 0000000..9cb7cbe --- /dev/null +++ b/view/templates/parts/header.html @@ -0,0 +1,13 @@ +
+ +