import datetime
import email.utils
import json
import os.path
import random
import urllib
from typing import Optional
import marko
import marko.inline
from .warko.strikethrough import StrikeThrough
from .warko.headinginjector import get_heading_injector
from .warko.extendedimage import get_image_renderer, ExtendedImageElement
from .warko.newtablink import get_new_tab_links
from ..config import Config
CONTENT_FILE_NAME = "content.md"
META_FILE_NAME = "meta.json"
TITLE_KEY = "title"
PUBLISH_DATE_KEY = "publish_date"
TAGS_KEY = "tags"
THUMBNAIL_KEY = "thumbnail"
INTRO_KEY = "intro"
INTRO_DESIRED_LENGTH = 320
INTRO_MAX_EXTRA_LENGTH = 100
posts_cache = []
def get_posts_from_cache() -> list('Post'):
if len(posts_cache) == 0:
raise Exception("Getting posts list from cache while it is still empty.")
return posts_cache.copy()
class Post:
PARSER = marko.Markdown(renderer=marko.HTMLRenderer
, extensions=[
StrikeThrough
, get_heading_injector("> ")
, get_image_renderer(["blog.wazul.moe"])
, get_new_tab_links(["blog.wazul.moe"])
]
)
def __init__(self, path: str):
self.path = path
self.name = os.path.basename(path)
with open(os.path.join(path, CONTENT_FILE_NAME)) as f:
self.content = Post.PARSER.parse(f.read())
self.html = Post.PARSER.render(self.content)
with open(os.path.join(path, META_FILE_NAME)) as f:
self.meta_data = json.load(f)
self.subpages = []
self.extra_files = []
for elem in os.listdir(path):
if elem != CONTENT_FILE_NAME and elem != META_FILE_NAME:
if elem.endswith(".md"):
with open(os.path.join(path, elem)) as f:
subpage_content = Post.PARSER.parse(f.read())
self.subpages.append((os.path.basename(elem)[:-3], Post.PARSER.render(subpage_content)))
else:
self.extra_files.append(elem)
self.href = f"/posts/{self.get_publish_year()}/{self.name}"
if THUMBNAIL_KEY in self.meta_data:
self.thumbnail = self.meta_data[THUMBNAIL_KEY]
else:
self.thumbnail = Post._get_first_image_path(self.content)
self.thumbnail = f"{self.href}/{self.thumbnail}"
if INTRO_KEY in self.meta_data:
self.intro = self.meta_data[INTRO_KEY]
else:
intro_str = Post._extract_first_paragraph_text(self.content)
if len(intro_str) > 320:
intro_str = intro_str[:320] # cut to length
# first try to cut at the last period, if it's not too far...
last_dot_pos = intro_str.rfind(".")
if 320 - last_dot_pos > 100:
intro_str = intro_str[:last_dot_pos + 1]
else:
intro_str += "..." # If too far, just add more dots
self.intro = intro_str
def title(self) -> str:
return self.meta_data[TITLE_KEY]
def get_tags(self) -> str:
return " ".join(["{}".format(Config.BLOG_ROOT_URL, tag, tag) for tag in
self.meta_data[TAGS_KEY]])
def get_link(self):
return urllib.parse.urljoin(Config.BLOG_ROOT_URL, self.href)
def get_publish_time(self) -> str:
return self.meta_data[PUBLISH_DATE_KEY]
def get_publish_time_rfc2822(self) -> str:
return email.utils.format_datetime(datetime.datetime.fromisoformat(self.get_publish_time() + " 12:00+02:00"))
def get_publish_year(self) -> str:
return self.meta_data[PUBLISH_DATE_KEY][0:4]
def get_fake_path(self) -> str:
return "~/posts/{}/{}".format(self.get_publish_year(), self.name)
def get_prompt(self, cmd: str) -> str:
return Config.get_prompt(self.get_fake_path(), cmd)
def get_cat_prompt(self, file: str) -> str:
return self.get_prompt(f"cat {file}")
def get_index_prompt(self) -> str:
return Config.get_prompt("~", f"head {self.get_fake_path()}/content")
def get_similar_posts(self) -> list['Post']:
ret_list = []
posts = get_posts_from_cache()
idx_self = posts.index(self)
# TODO move to config
MAX_SIMILAR_POST_COUNT = 5
POSTS_AROUND_DISTANCE = 5
# add the previous post
if idx_self < len(posts) - 1:
ret_list.append(posts.pop(idx_self + 1))
# TODO add some tagbased search when I have more content
# fallback: add random posts from around the current post
posts_around = posts[idx_self - POSTS_AROUND_DISTANCE:idx_self + POSTS_AROUND_DISTANCE]
posts_around.remove(self)
while len(posts_around) > 0 and len(ret_list) < MAX_SIMILAR_POST_COUNT:
rand_index = random.randint(0, len(posts_around) - 1)
ret_list.append(posts_around.pop(rand_index))
return ret_list
def get_similar_posts_ls(self) -> str:
# TODO fix
return self.generate_similar_posts_ls([self, self, self])
@staticmethod
def generate_similar_posts_ls(other_posts: list['Post']) -> str:
lines = ["total 0"]
for post in other_posts:
lines.append("lrwxrwxrwx 1 {} {} {} {} 11:11 '{}' -> {}".format( # TODO random time and fix filename escape
Config.BLOG_OWNER, Config.BLOG_OWNER, len(post.get_fake_path()), post.get_publish_time(), post.title(),
post.get_fake_path())
)
return "
".join(lines)
@staticmethod
def _extract_first_paragraph_text(root) -> str:
for child in root.children:
if isinstance(child, marko.block.Paragraph):
paragraph_str = ""
for part in child.children:
if isinstance(part, marko.inline.RawText) and isinstance(part.children, str):
paragraph_str += part.children
if isinstance(part, marko.inline.LineBreak):
if paragraph_str[-1:] != " ":
paragraph_str += " "
if isinstance(part, marko.inline.Link):
for part_child in part.children:
if isinstance(part_child, marko.inline.RawText) and isinstance(part_child.children, str):
paragraph_str += part_child.children
if len(paragraph_str) > INTRO_DESIRED_LENGTH:
paragraph_str = paragraph_str[:INTRO_DESIRED_LENGTH] # cut to length
# first try to cut at the last period, if it's not too far...
last_dot_pos = paragraph_str.rfind(".")
if INTRO_DESIRED_LENGTH - last_dot_pos > INTRO_MAX_EXTRA_LENGTH:
intro_str = paragraph_str[:last_dot_pos + 1]
else:
paragraph_str += "..." # If too far, just add more dots
return paragraph_str # return after the first paragraph
return ""
@staticmethod
def _get_first_image_path(root) -> Optional[str]:
if isinstance(root, marko.inline.Image):
return root.dest
if isinstance(root, ExtendedImageElement):
return root.src
if hasattr(root, 'children'):
for elm in root.children:
img = Post._get_first_image_path(elm)
if img:
return img
return None
def get_posts(path: str) -> list[Post]:
return_list = []
for directory in os.listdir(path):
return_list.append(Post(os.path.join(path, directory)))
return_list.sort(key=lambda post: post.meta_data[PUBLISH_DATE_KEY], reverse=True)
posts_cache.clear()
posts_cache.extend(return_list)
return return_list