diff --git a/misc/article_id_generator.py b/misc/article_id_generator.py new file mode 100644 index 0000000..ab9e445 --- /dev/null +++ b/misc/article_id_generator.py @@ -0,0 +1,44 @@ +#!/usr/bin/python3 + +# +# Create a UID of the article in old articles where we don't have RSS UID and where we can't generate the article_id on the fly. +# It takes a while, but it's a one-shot. +# + +import sqlite3 +import hashlib + +db_con = sqlite3.connect("../data/diffs.db") +db = db_con.cursor() + + + + + + + +def create_article_id(uid, feed): + # Create a fake unique ID from RSS unique tag and feed name to reference the article in database + id_string = str(uid) + str(feed) + id_bytes = id_string.encode('utf-8') + article_id = hashlib.sha256(id_bytes).hexdigest() + return(article_id) + + +def update_diff(diff_id, article_id): + sql = "UPDATE diffs SET article_id = ? WHERE diff_id = ?" + sql_data = (article_id, diff_id) + db.execute(sql, sql_data) + db_con.commit() + + + +db.execute( + "SELECT * FROM diffs WHERE NOT 'article_id' ORDER BY diff_id DESC ", +) +diffs = db.fetchall() + +for diff in diffs: + article_id = create_article_id(diff[1], diff[2]) + update_diff(diff[0], article_id) + print(article_id) \ No newline at end of file diff --git a/processor/app.py b/processor/app.py index 58c5bac..bb06ba7 100644 --- a/processor/app.py +++ b/processor/app.py @@ -6,6 +6,7 @@ import redis import time import json import sqlite3 +import hashlib from diff_match_patch import diff_match_patch @@ -34,6 +35,7 @@ db = db_con.cursor() db.executescript(""" CREATE TABLE IF NOT EXISTS diffs ( diff_id INTEGER PRIMARY KEY, + article_id TEXT, feed_name TEXT NOT NULL, article_url TEXT NOT NULL, title_orig TEXT NOT NULL, @@ -84,8 +86,8 @@ def process_diff(old, new, rss_id): # print(old['link']) # print(diff) - sql = "INSERT INTO diffs(feed_name, article_url, title_orig, title_new, diff_html, diff_time) VALUES (?,?,?,?,?,datetime('now', 'localtime'))" - sql_data = (old['medium'], old['link'], old['title'], new['title'], html_diff) + sql = "INSERT INTO diffs(article_id, feed_name, article_url, title_orig, title_new, diff_html, diff_time) VALUES (?,?,?,?,?,datetime('now', 'localtime'))" + sql_data = (new['article_id'], old['medium'], old['link'], old['title'], new['title'], html_diff) db.execute(sql, sql_data) db_con.commit() @@ -108,7 +110,12 @@ def process_item(article, rc): # Article is new, just create it and exit write_article(article, rc) - +def create_article_id(uid, feed): + # Create a unique ID from RSS unique tag and feed name to reference the article in database + id_string = str(uid) + str(feed) + id_bytes = id_string.encode('utf-8') + article_id = hashlib.sha256(id_bytes).hexdigest() + return(article_id) for feed in config['feeds']: @@ -123,11 +130,13 @@ for feed in config['feeds']: try: rss_id = item[unique_tag] title = item['title'] + article_id = create_article_id(rss_id, name) #description = item['description'] ## Don't store description for now, as we don't need it and it's big. published = time.strftime('%Y:%m:%d %H:%M:%S %Z %z', item['published_parsed']) link = item['link'] article_data = { 'title' : title, + 'article_id': article_id, #'description': description, 'published' : published, 'link' : link, diff --git a/view/app.py b/view/app.py index 61042fe..8604279 100644 --- a/view/app.py +++ b/view/app.py @@ -80,13 +80,14 @@ def index(): ) -@app.route("/article/") -def article_detail(article_url: str): +@app.route("/article/") +def article_detail(article_id: str): db = get_db().cursor() - db.execute("SELECT * FROM diffs WHERE article_url = ?", (article_url,)) + db.execute("SELECT * FROM diffs WHERE article_id = ?", (article_id,)) result = db.fetchall() + article_url = result[0]['article_url'] # TODO: Handle if nothing is found and return 404 in that case. - return render_template("article_detail.html", article_url=article_url, diffs=result ) + return render_template("article_detail.html", article_id=article_id, article_url=article_url, diffs=result ) @app.route('/about') diff --git a/view/templates/article_detail.html b/view/templates/article_detail.html index 13daf54..214168e 100644 --- a/view/templates/article_detail.html +++ b/view/templates/article_detail.html @@ -2,7 +2,8 @@ {% block body %}
-

Diffs for the article at {{ article_url }}

+

Diffs for the article at {{ article_url|truncate(50) }}

+
    {% for diff in diffs %} diff --git a/view/templates/index.html b/view/templates/index.html index 029bd9e..1534c08 100644 --- a/view/templates/index.html +++ b/view/templates/index.html @@ -35,7 +35,7 @@ Display current article - + Show change history