Migrate code style to Black
This commit is contained in:
parent
986dd93550
commit
6cccea04fb
5 changed files with 189 additions and 165 deletions
|
@ -7,7 +7,7 @@ insert_final_newline = true
|
||||||
trim_trailing_whitespace = true
|
trim_trailing_whitespace = true
|
||||||
|
|
||||||
[*.py]
|
[*.py]
|
||||||
indent_style = tab
|
indent_style = space
|
||||||
indent_size = 4
|
indent_size = 4
|
||||||
|
|
||||||
[*.{html,css}]
|
[*.{html,css}]
|
||||||
|
|
4
.vscode/settings.json
vendored
4
.vscode/settings.json
vendored
|
@ -1,5 +1,9 @@
|
||||||
{
|
{
|
||||||
"files.associations": {
|
"files.associations": {
|
||||||
"*.html": "jinja-html"
|
"*.html": "jinja-html"
|
||||||
|
},
|
||||||
|
"[python]": {
|
||||||
|
"editor.defaultFormatter": "ms-python.black-formatter",
|
||||||
|
"editor.formatOnSave": true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,33 +12,27 @@ db_con = sqlite3.connect("../data/diffs.db")
|
||||||
db = db_con.cursor()
|
db = db_con.cursor()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def create_article_id(uid, feed):
|
def create_article_id(uid, feed):
|
||||||
# Create a fake unique ID from RSS unique tag and feed name to reference the article in database
|
# Create a fake unique ID from RSS unique tag and feed name to reference the article in database
|
||||||
id_string = str(uid) + str(feed)
|
id_string = str(uid) + str(feed)
|
||||||
id_bytes = id_string.encode('utf-8')
|
id_bytes = id_string.encode("utf-8")
|
||||||
article_id = hashlib.sha256(id_bytes).hexdigest()
|
article_id = hashlib.sha256(id_bytes).hexdigest()
|
||||||
return(article_id)
|
return article_id
|
||||||
|
|
||||||
|
|
||||||
def update_diff(diff_id, article_id):
|
def update_diff(diff_id, article_id):
|
||||||
sql = "UPDATE diffs SET article_id = ? WHERE diff_id = ?"
|
sql = "UPDATE diffs SET article_id = ? WHERE diff_id = ?"
|
||||||
sql_data = (article_id, diff_id)
|
sql_data = (article_id, diff_id)
|
||||||
db.execute(sql, sql_data)
|
db.execute(sql, sql_data)
|
||||||
db_con.commit()
|
db_con.commit()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
db.execute(
|
db.execute(
|
||||||
"SELECT * FROM diffs WHERE NOT 'article_id' ORDER BY diff_id DESC ",
|
"SELECT * FROM diffs WHERE NOT 'article_id' ORDER BY diff_id DESC ",
|
||||||
)
|
)
|
||||||
diffs = db.fetchall()
|
diffs = db.fetchall()
|
||||||
|
|
||||||
for diff in diffs:
|
for diff in diffs:
|
||||||
article_id = create_article_id(diff[1], diff[2])
|
article_id = create_article_id(diff[1], diff[2])
|
||||||
update_diff(diff[0], article_id)
|
update_diff(diff[0], article_id)
|
||||||
print(article_id)
|
print(article_id)
|
||||||
|
|
161
processor/app.py
161
processor/app.py
|
@ -14,25 +14,26 @@ from diff_match_patch import diff_match_patch
|
||||||
#
|
#
|
||||||
# Idea block:
|
# Idea block:
|
||||||
#
|
#
|
||||||
# Můžeme zkusit ke každému ID článku přidat taky hash obsahu, s tím že v začátku budeme kontrolovat jenom změnu hashe a až pak obsah stringů.
|
# Můžeme zkusit ke každému ID článku přidat taky hash obsahu, s tím že v začátku budeme kontrolovat jenom změnu hashe a až pak obsah stringů.
|
||||||
# Ale nevím jestli to bude reálně efektivnější
|
# Ale nevím jestli to bude reálně efektivnější
|
||||||
#
|
#
|
||||||
# Teď budeme kontrolovat jen titulky, ale postupně můžeme přidat i description článku
|
# Teď budeme kontrolovat jen titulky, ale postupně můžeme přidat i description článku
|
||||||
|
|
||||||
CONFIG_FILE = "../data/config.yaml"
|
CONFIG_FILE = "../data/config.yaml"
|
||||||
REDIS_ARTICLE_EXPIRE_SEC = 604800
|
REDIS_ARTICLE_EXPIRE_SEC = 604800
|
||||||
|
|
||||||
config = confuse.Configuration('headline', __name__)
|
config = confuse.Configuration("headline", __name__)
|
||||||
config.set_file(CONFIG_FILE)
|
config.set_file(CONFIG_FILE)
|
||||||
|
|
||||||
dmp = diff_match_patch()
|
dmp = diff_match_patch()
|
||||||
|
|
||||||
rc = redis.Redis(host='redis', port=6379, db=0)
|
rc = redis.Redis(host="redis", port=6379, db=0)
|
||||||
|
|
||||||
db_con = sqlite3.connect("../data/diffs.db")
|
db_con = sqlite3.connect("../data/diffs.db")
|
||||||
db = db_con.cursor()
|
db = db_con.cursor()
|
||||||
|
|
||||||
db.executescript("""
|
db.executescript(
|
||||||
|
"""
|
||||||
PRAGMA journal_mode=WAL;
|
PRAGMA journal_mode=WAL;
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS diffs (
|
CREATE TABLE IF NOT EXISTS diffs (
|
||||||
|
@ -74,90 +75,100 @@ CREATE TRIGGER IF NOT EXISTS diffs_aupdate AFTER UPDATE ON diffs
|
||||||
INSERT INTO diffs_fts (rowid, title_orig, title_new)
|
INSERT INTO diffs_fts (rowid, title_orig, title_new)
|
||||||
VALUES (new.diff_id, new.title_orig, new.title_new);
|
VALUES (new.diff_id, new.title_orig, new.title_new);
|
||||||
END;
|
END;
|
||||||
""")
|
"""
|
||||||
|
)
|
||||||
article_count = 0
|
article_count = 0
|
||||||
|
|
||||||
|
|
||||||
def write_article(article, rc):
|
def write_article(article, rc):
|
||||||
rval = json.dumps(article['content'])
|
rval = json.dumps(article["content"])
|
||||||
rc.set(article['rss_id'], rval, ex=REDIS_ARTICLE_EXPIRE_SEC)
|
rc.set(article["rss_id"], rval, ex=REDIS_ARTICLE_EXPIRE_SEC)
|
||||||
|
|
||||||
|
|
||||||
def process_diff(old, new, rss_id):
|
def process_diff(old, new, rss_id):
|
||||||
diff = dmp.diff_main(old['title'], new['title'])
|
diff = dmp.diff_main(old["title"], new["title"])
|
||||||
dmp.diff_cleanupSemantic(diff)
|
dmp.diff_cleanupSemantic(diff)
|
||||||
html_diff = dmp.diff_prettyHtml(diff)
|
html_diff = dmp.diff_prettyHtml(diff)
|
||||||
# print(old['link'])
|
# print(old['link'])
|
||||||
# print(diff)
|
# print(diff)
|
||||||
|
|
||||||
sql = "INSERT INTO diffs(article_id, feed_name, article_url, title_orig, title_new, diff_html, diff_time) VALUES (?,?,?,?,?,?,datetime('now', 'localtime'))"
|
sql = "INSERT INTO diffs(article_id, feed_name, article_url, title_orig, title_new, diff_html, diff_time) VALUES (?,?,?,?,?,?,datetime('now', 'localtime'))"
|
||||||
sql_data = (new['article_id'], old['medium'], old['link'], old['title'], new['title'], html_diff)
|
sql_data = (
|
||||||
db.execute(sql, sql_data)
|
new["article_id"],
|
||||||
db_con.commit()
|
old["medium"],
|
||||||
|
old["link"],
|
||||||
|
old["title"],
|
||||||
|
new["title"],
|
||||||
|
html_diff,
|
||||||
|
)
|
||||||
|
db.execute(sql, sql_data)
|
||||||
|
db_con.commit()
|
||||||
|
|
||||||
return(True)
|
return True
|
||||||
|
|
||||||
|
|
||||||
def process_item(article, rc):
|
def process_item(article, rc):
|
||||||
if rc.exists(article['rss_id']):
|
if rc.exists(article["rss_id"]):
|
||||||
old = json.loads(rc.get(article['rss_id']))
|
old = json.loads(rc.get(article["rss_id"]))
|
||||||
new = article['content']
|
new = article["content"]
|
||||||
if old['title'] != new['title']:
|
if old["title"] != new["title"]:
|
||||||
# print('Article changed. World is fucked.')
|
# print('Article changed. World is fucked.')
|
||||||
diff = process_diff(old, new, article['rss_id'])
|
diff = process_diff(old, new, article["rss_id"])
|
||||||
write_article(article, rc)
|
write_article(article, rc)
|
||||||
return(True)
|
return True
|
||||||
else:
|
else:
|
||||||
# Article is the same. All good!
|
# Article is the same. All good!
|
||||||
return(True)
|
return True
|
||||||
else:
|
else:
|
||||||
# Article is new, just create it and exit
|
# Article is new, just create it and exit
|
||||||
write_article(article, rc)
|
write_article(article, rc)
|
||||||
|
|
||||||
|
|
||||||
def create_article_id(uid, feed):
|
def create_article_id(uid, feed):
|
||||||
# Create a unique ID from RSS unique tag and feed name to reference the article in database
|
# Create a unique ID from RSS unique tag and feed name to reference the article in database
|
||||||
id_string = str(uid) + str(feed)
|
id_string = str(uid) + str(feed)
|
||||||
id_bytes = id_string.encode('utf-8')
|
id_bytes = id_string.encode("utf-8")
|
||||||
article_id = hashlib.sha256(id_bytes).hexdigest()
|
article_id = hashlib.sha256(id_bytes).hexdigest()
|
||||||
return(article_id)
|
return article_id
|
||||||
|
|
||||||
|
|
||||||
for feed in config['feeds']:
|
for feed in config["feeds"]:
|
||||||
try:
|
try:
|
||||||
rss_source = str(feed['rss_source'])
|
rss_source = str(feed["rss_source"])
|
||||||
unique_tag = str(feed['unique_tag'])
|
unique_tag = str(feed["unique_tag"])
|
||||||
name = str(feed['name'])
|
name = str(feed["name"])
|
||||||
|
|
||||||
rss = feedparser.parse(rss_source)
|
rss = feedparser.parse(rss_source)
|
||||||
|
|
||||||
for item in rss['entries']:
|
for item in rss["entries"]:
|
||||||
try:
|
try:
|
||||||
rss_id = item[unique_tag]
|
rss_id = item[unique_tag]
|
||||||
title = item['title']
|
title = item["title"]
|
||||||
article_id = create_article_id(rss_id, name)
|
article_id = create_article_id(rss_id, name)
|
||||||
#description = item['description'] ## Don't store description for now, as we don't need it and it's big.
|
# description = item['description'] ## Don't store description for now, as we don't need it and it's big.
|
||||||
published = time.strftime('%Y:%m:%d %H:%M:%S %Z %z', item['published_parsed'])
|
published = time.strftime(
|
||||||
link = item['link']
|
"%Y:%m:%d %H:%M:%S %Z %z", item["published_parsed"]
|
||||||
article_data = {
|
)
|
||||||
'title' : title,
|
link = item["link"]
|
||||||
'article_id': article_id,
|
article_data = {
|
||||||
#'description': description,
|
"title": title,
|
||||||
'published' : published,
|
"article_id": article_id,
|
||||||
'link' : link,
|
#'description': description,
|
||||||
'medium' : name
|
"published": published,
|
||||||
}
|
"link": link,
|
||||||
article = {
|
"medium": name,
|
||||||
'rss_id' : rss_id,
|
}
|
||||||
'content' : article_data
|
article = {"rss_id": rss_id, "content": article_data}
|
||||||
}
|
article_count += 1
|
||||||
article_count += 1
|
process_item(article, rc)
|
||||||
process_item(article, rc)
|
except Exception as e:
|
||||||
except Exception as e:
|
print("Parsing article failed")
|
||||||
print("Parsing article failed")
|
print(e)
|
||||||
print(e)
|
print(item)
|
||||||
print(item)
|
except Exception as e:
|
||||||
except Exception as e:
|
print("Parsing feed failed.")
|
||||||
print("Parsing feed failed.")
|
print(e)
|
||||||
print(e)
|
print(feed)
|
||||||
print(feed)
|
pass
|
||||||
pass
|
|
||||||
|
|
||||||
print("Processed articles: " + str(article_count))
|
print("Processed articles: " + str(article_count))
|
||||||
|
|
153
view/app.py
153
view/app.py
|
@ -10,7 +10,7 @@ import re
|
||||||
DATABASE = "../data/diffs.db"
|
DATABASE = "../data/diffs.db"
|
||||||
CONFIG_FILE = "../data/config.yaml"
|
CONFIG_FILE = "../data/config.yaml"
|
||||||
|
|
||||||
config = confuse.Configuration('headline', __name__)
|
config = confuse.Configuration("headline", __name__)
|
||||||
config.set_file(CONFIG_FILE)
|
config.set_file(CONFIG_FILE)
|
||||||
|
|
||||||
|
|
||||||
|
@ -18,104 +18,119 @@ app = Flask(__name__)
|
||||||
|
|
||||||
|
|
||||||
def get_db():
|
def get_db():
|
||||||
db = getattr(g, '_database', None)
|
db = getattr(g, "_database", None)
|
||||||
if db is None:
|
if db is None:
|
||||||
db = g._database = sqlite3.connect(DATABASE)
|
db = g._database = sqlite3.connect(DATABASE)
|
||||||
db.row_factory = sqlite3.Row
|
db.row_factory = sqlite3.Row
|
||||||
return db
|
return db
|
||||||
|
|
||||||
|
|
||||||
@app.teardown_appcontext
|
@app.teardown_appcontext
|
||||||
def close_connection(exception):
|
def close_connection(exception):
|
||||||
db = getattr(g, '_database', None)
|
db = getattr(g, "_database", None)
|
||||||
if db is not None:
|
if db is not None:
|
||||||
db.close()
|
db.close()
|
||||||
|
|
||||||
|
|
||||||
def websearch_to_fts_query(search: str):
|
def websearch_to_fts_query(search: str):
|
||||||
"""
|
"""
|
||||||
Converts web searches into fts queries:
|
Converts web searches into fts queries:
|
||||||
'this is "a test"' -> '"this" OR "is" OR "a test"'
|
'this is "a test"' -> '"this" OR "is" OR "a test"'
|
||||||
"""
|
"""
|
||||||
return ' OR '.join(['"'+m.group(0)+'"' for m in re.finditer(r'(?<=")[^"]+(?=")|[^\s"]+', search)])
|
return " OR ".join(
|
||||||
|
[
|
||||||
|
'"' + m.group(0) + '"'
|
||||||
|
for m in re.finditer(r'(?<=")[^"]+(?=")|[^\s"]+', search)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.route('/')
|
@app.route("/")
|
||||||
def index():
|
def index():
|
||||||
db = get_db().cursor()
|
db = get_db().cursor()
|
||||||
|
|
||||||
search = request.args.get("search", type=str, default="")
|
search = request.args.get("search", type=str, default="")
|
||||||
query = websearch_to_fts_query(search) if search else None
|
query = websearch_to_fts_query(search) if search else None
|
||||||
|
|
||||||
# View options
|
# View options
|
||||||
expand_diffs = request.args.get("expand_diffs") is not None
|
expand_diffs = request.args.get("expand_diffs") is not None
|
||||||
|
|
||||||
db.execute(f"SELECT count(*) FROM diffs{'_fts(?)' if query else ''}", (query,) if query else ())
|
db.execute(
|
||||||
|
f"SELECT count(*) FROM diffs{'_fts(?)' if query else ''}",
|
||||||
|
(query,) if query else (),
|
||||||
|
)
|
||||||
|
|
||||||
diff_count = db.fetchall()[0][0]
|
diff_count = db.fetchall()[0][0]
|
||||||
|
|
||||||
|
# flask-paginate
|
||||||
|
page = request.args.get(get_page_parameter(), type=int, default=1)
|
||||||
|
|
||||||
#flask-paginate
|
pagination = Pagination(
|
||||||
page = request.args.get(get_page_parameter(), type=int, default=1)
|
page=page, total=diff_count, record_name="diffs", css_framework="bootstrap5"
|
||||||
|
)
|
||||||
|
|
||||||
pagination = Pagination(page=page, total=diff_count, record_name='diffs', css_framework='bootstrap5')
|
page_skip = pagination.skip
|
||||||
|
per_page = pagination.per_page
|
||||||
|
if query:
|
||||||
|
db.execute(
|
||||||
|
"SELECT * FROM diffs JOIN (SELECT rowid FROM diffs_fts(?)) filter ON filter.rowid = diffs.diff_id ORDER BY diff_id DESC LIMIT ? OFFSET ?",
|
||||||
|
(query, per_page, page_skip),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
db.execute(
|
||||||
|
"SELECT * FROM diffs ORDER BY diff_id DESC LIMIT ? OFFSET ?",
|
||||||
|
(per_page, page_skip),
|
||||||
|
)
|
||||||
|
diffs = db.fetchall()
|
||||||
|
|
||||||
|
return render_template(
|
||||||
page_skip = pagination.skip
|
"./index.html",
|
||||||
per_page = pagination.per_page
|
diffs=diffs,
|
||||||
if query:
|
page=page,
|
||||||
db.execute(
|
pagination=pagination,
|
||||||
"SELECT * FROM diffs JOIN (SELECT rowid FROM diffs_fts(?)) filter ON filter.rowid = diffs.diff_id ORDER BY diff_id DESC LIMIT ? OFFSET ?",
|
diff_count=diff_count,
|
||||||
(query,per_page,page_skip)
|
search=search,
|
||||||
)
|
expand_diffs=expand_diffs,
|
||||||
else:
|
)
|
||||||
db.execute(
|
|
||||||
"SELECT * FROM diffs ORDER BY diff_id DESC LIMIT ? OFFSET ?",
|
|
||||||
(per_page,page_skip)
|
|
||||||
)
|
|
||||||
diffs = db.fetchall()
|
|
||||||
|
|
||||||
return render_template('./index.html',
|
|
||||||
diffs=diffs,
|
|
||||||
page=page,
|
|
||||||
pagination=pagination,
|
|
||||||
diff_count = diff_count,
|
|
||||||
search=search,
|
|
||||||
expand_diffs=expand_diffs,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/article/<path:article_id>")
|
@app.route("/article/<path:article_id>")
|
||||||
def article_detail(article_id: str):
|
def article_detail(article_id: str):
|
||||||
db = get_db().cursor()
|
db = get_db().cursor()
|
||||||
db.execute("SELECT * FROM diffs WHERE article_id = ?", (article_id,))
|
db.execute("SELECT * FROM diffs WHERE article_id = ?", (article_id,))
|
||||||
result = db.fetchall()
|
result = db.fetchall()
|
||||||
article_url = result[0]['article_url']
|
article_url = result[0]["article_url"]
|
||||||
# TODO: Handle if nothing is found and return 404 in that case.
|
# TODO: Handle if nothing is found and return 404 in that case.
|
||||||
return render_template("article_detail.html", article_id=article_id, article_url=article_url, diffs=result )
|
return render_template(
|
||||||
|
"article_detail.html",
|
||||||
|
article_id=article_id,
|
||||||
|
article_url=article_url,
|
||||||
|
diffs=result,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.route('/about')
|
@app.route("/about")
|
||||||
def about():
|
def about():
|
||||||
return render_template('about.html')
|
return render_template("about.html")
|
||||||
|
|
||||||
|
|
||||||
@app.route('/feeds')
|
@app.route("/feeds")
|
||||||
def feed_list():
|
def feed_list():
|
||||||
feeds = []
|
feeds = []
|
||||||
for conf in config['feeds']:
|
for conf in config["feeds"]:
|
||||||
feed = {
|
feed = {
|
||||||
'rss_source' : str(conf['rss_source']),
|
"rss_source": str(conf["rss_source"]),
|
||||||
'unique_tag' : str(conf['unique_tag']),
|
"unique_tag": str(conf["unique_tag"]),
|
||||||
'feed_name' : str(conf['name'])
|
"feed_name": str(conf["name"]),
|
||||||
}
|
}
|
||||||
feeds.append(feed)
|
feeds.append(feed)
|
||||||
return render_template('feeds.html', feeds=feeds)
|
return render_template("feeds.html", feeds=feeds)
|
||||||
|
|
||||||
|
|
||||||
@app.route('/robots.txt')
|
@app.route("/robots.txt")
|
||||||
def static_from_root():
|
def static_from_root():
|
||||||
return send_from_directory(app.static_folder, request.path[1:])
|
return send_from_directory(app.static_folder or "static", request.path[1:])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
app.run(host="0.0.0.0")
|
app.run(host="0.0.0.0")
|
||||||
|
|
Loading…
Reference in a new issue