give every article ID to enable grouping changes by article

This commit is contained in:
Matěj Divecký 2023-08-17 11:19:12 +02:00
parent ac2ca35a56
commit 2c97d7ab69
5 changed files with 64 additions and 9 deletions

View file

@ -0,0 +1,44 @@
#!/usr/bin/python3
#
# Create a UID of the article in old articles where we don't have RSS UID and where we can't generate the article_id on the fly.
# It takes a while, but it's a one-shot.
#
import sqlite3
import hashlib
db_con = sqlite3.connect("../data/diffs.db")
db = db_con.cursor()
def create_article_id(uid, feed):
# Create a fake unique ID from RSS unique tag and feed name to reference the article in database
id_string = str(uid) + str(feed)
id_bytes = id_string.encode('utf-8')
article_id = hashlib.sha256(id_bytes).hexdigest()
return(article_id)
def update_diff(diff_id, article_id):
sql = "UPDATE diffs SET article_id = ? WHERE diff_id = ?"
sql_data = (article_id, diff_id)
db.execute(sql, sql_data)
db_con.commit()
db.execute(
"SELECT * FROM diffs WHERE NOT 'article_id' ORDER BY diff_id DESC ",
)
diffs = db.fetchall()
for diff in diffs:
article_id = create_article_id(diff[1], diff[2])
update_diff(diff[0], article_id)
print(article_id)

View file

@ -6,6 +6,7 @@ import redis
import time
import json
import sqlite3
import hashlib
from diff_match_patch import diff_match_patch
@ -34,6 +35,7 @@ db = db_con.cursor()
db.executescript("""
CREATE TABLE IF NOT EXISTS diffs (
diff_id INTEGER PRIMARY KEY,
article_id TEXT,
feed_name TEXT NOT NULL,
article_url TEXT NOT NULL,
title_orig TEXT NOT NULL,
@ -84,8 +86,8 @@ def process_diff(old, new, rss_id):
# print(old['link'])
# print(diff)
sql = "INSERT INTO diffs(feed_name, article_url, title_orig, title_new, diff_html, diff_time) VALUES (?,?,?,?,?,datetime('now', 'localtime'))"
sql_data = (old['medium'], old['link'], old['title'], new['title'], html_diff)
sql = "INSERT INTO diffs(article_id, feed_name, article_url, title_orig, title_new, diff_html, diff_time) VALUES (?,?,?,?,?,datetime('now', 'localtime'))"
sql_data = (new['article_id'], old['medium'], old['link'], old['title'], new['title'], html_diff)
db.execute(sql, sql_data)
db_con.commit()
@ -108,7 +110,12 @@ def process_item(article, rc):
# Article is new, just create it and exit
write_article(article, rc)
def create_article_id(uid, feed):
# Create a unique ID from RSS unique tag and feed name to reference the article in database
id_string = str(uid) + str(feed)
id_bytes = id_string.encode('utf-8')
article_id = hashlib.sha256(id_bytes).hexdigest()
return(article_id)
for feed in config['feeds']:
@ -123,11 +130,13 @@ for feed in config['feeds']:
try:
rss_id = item[unique_tag]
title = item['title']
article_id = create_article_id(rss_id, name)
#description = item['description'] ## Don't store description for now, as we don't need it and it's big.
published = time.strftime('%Y:%m:%d %H:%M:%S %Z %z', item['published_parsed'])
link = item['link']
article_data = {
'title' : title,
'article_id': article_id,
#'description': description,
'published' : published,
'link' : link,

View file

@ -80,13 +80,14 @@ def index():
)
@app.route("/article/<path:article_url>")
def article_detail(article_url: str):
@app.route("/article/<path:article_id>")
def article_detail(article_id: str):
db = get_db().cursor()
db.execute("SELECT * FROM diffs WHERE article_url = ?", (article_url,))
db.execute("SELECT * FROM diffs WHERE article_id = ?", (article_id,))
result = db.fetchall()
article_url = result[0]['article_url']
# TODO: Handle if nothing is found and return 404 in that case.
return render_template("article_detail.html", article_url=article_url, diffs=result )
return render_template("article_detail.html", article_id=article_id, article_url=article_url, diffs=result )
@app.route('/about')

View file

@ -2,7 +2,8 @@
{% block body %}
<div class="container">
<h1>Diffs for the article at {{ article_url }}</h1>
<h1>Diffs for the article at <a href="{{ article_url }}">{{ article_url|truncate(50) }}</a></h1>
<br>
<ol class="diffs-list">
{% for diff in diffs %}

View file

@ -35,7 +35,7 @@
<svg class="inline-icon" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M10 6V8H5V19H16V14H18V20C18 20.5523 17.5523 21 17 21H4C3.44772 21 3 20.5523 3 20V7C3 6.44772 3.44772 6 4 6H10ZM21 3V11H19L18.9999 6.413L11.2071 14.2071L9.79289 12.7929L17.5849 5H13V3H21Z"></path></svg>
Display current article
</a>
<a class="changeset-action" href="/article/{{ diff.article_url }}">
<a class="changeset-action" href="/article/{{ diff.article_id }}">
<svg class="inline-icon" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 22C6.47715 22 2 17.5228 2 12C2 6.47715 6.47715 2 12 2C17.5228 2 22 6.47715 22 12C22 17.5228 17.5228 22 12 22ZM12 20C16.4183 20 20 16.4183 20 12C20 7.58172 16.4183 4 12 4C7.58172 4 4 7.58172 4 12C4 16.4183 7.58172 20 12 20ZM13 12H17V14H11V7H13V12Z"></path></svg>
Show change history
</a>