import datetime import email.utils import json import os.path import random import urllib from typing import Optional import marko import marko.inline from .warko.strikethrough import StrikeThrough from .warko.headinginjector import get_heading_injector from .warko.extendedimage import get_image_renderer, ExtendedImageElement from .warko.newtablink import get_new_tab_links from ..config import Config CONTENT_FILE_NAME = "content.md" META_FILE_NAME = "meta.json" TITLE_KEY = "title" PUBLISH_DATE_KEY = "publish_date" TAGS_KEY = "tags" THUMBNAIL_KEY = "thumbnail" INTRO_KEY = "intro" INTRO_DESIRED_LENGTH = 320 INTRO_MAX_EXTRA_LENGTH = 100 posts_cache = [] def get_posts_from_cache() -> list('Post'): if len(posts_cache) == 0: raise Exception("Getting posts list from cache while it is still empty.") return posts_cache.copy() class Post: PARSER = marko.Markdown(renderer=marko.HTMLRenderer , extensions=[ StrikeThrough , get_heading_injector("> ") , get_image_renderer(["blog.wazul.moe"]) , get_new_tab_links(["blog.wazul.moe"]) ] ) def __init__(self, path: str): self.path = path self.name = os.path.basename(path) with open(os.path.join(path, CONTENT_FILE_NAME)) as f: self.content = Post.PARSER.parse(f.read()) self.html = Post.PARSER.render(self.content) with open(os.path.join(path, META_FILE_NAME)) as f: self.meta_data = json.load(f) self.subpages = [] self.extra_files = [] for elem in os.listdir(path): if elem != CONTENT_FILE_NAME and elem != META_FILE_NAME: if elem.endswith(".md"): with open(os.path.join(path, elem)) as f: subpage_content = Post.PARSER.parse(f.read()) self.subpages.append((os.path.basename(elem)[:-3], Post.PARSER.render(subpage_content))) else: self.extra_files.append(elem) self.href = f"/posts/{self.get_publish_year()}/{self.name}" if THUMBNAIL_KEY in self.meta_data: self.thumbnail = self.meta_data[THUMBNAIL_KEY] else: self.thumbnail = Post._get_first_image_path(self.content) self.thumbnail = f"{self.href}/{self.thumbnail}" if INTRO_KEY in self.meta_data: self.intro = self.meta_data[INTRO_KEY] else: intro_str = Post._extract_first_paragraph_text(self.content) if len(intro_str) > 320: intro_str = intro_str[:320] # cut to length # first try to cut at the last period, if it's not too far... last_dot_pos = intro_str.rfind(".") if 320 - last_dot_pos > 100: intro_str = intro_str[:last_dot_pos + 1] else: intro_str += "..." # If too far, just add more dots self.intro = intro_str def title(self) -> str: return self.meta_data[TITLE_KEY] def get_tags(self) -> str: return " ".join(["{}".format(Config.BLOG_ROOT_URL, tag, tag) for tag in self.meta_data[TAGS_KEY]]) def get_link(self): return urllib.parse.urljoin(Config.BLOG_ROOT_URL, self.href) def get_publish_time(self) -> str: return self.meta_data[PUBLISH_DATE_KEY] def get_publish_time_rfc2822(self) -> str: return email.utils.format_datetime(datetime.datetime.fromisoformat(self.get_publish_time() + " 12:00+02:00")) def get_publish_year(self) -> str: return self.meta_data[PUBLISH_DATE_KEY][0:4] def get_fake_path(self) -> str: return "~/posts/{}/{}".format(self.get_publish_year(), self.name) def get_prompt(self, cmd: str) -> str: return Config.get_prompt(self.get_fake_path(), cmd) def get_cat_prompt(self, file: str) -> str: return self.get_prompt(f"cat {file}") def get_index_prompt(self) -> str: return Config.get_prompt("~", f"head {self.get_fake_path()}/content") def get_similar_posts(self) -> list['Post']: ret_list = [] posts = get_posts_from_cache() idx_self = posts.index(self) # TODO move to config MAX_SIMILAR_POST_COUNT = 5 POSTS_AROUND_DISTANCE = 5 # add the previous post if idx_self < len(posts) - 1: ret_list.append(posts.pop(idx_self + 1)) # TODO add some tagbased search when I have more content # fallback: add random posts from around the current post posts_around = posts[idx_self - POSTS_AROUND_DISTANCE:idx_self + POSTS_AROUND_DISTANCE] posts_around.remove(self) while len(posts_around) > 0 and len(ret_list) < MAX_SIMILAR_POST_COUNT: rand_index = random.randint(0, len(posts_around) - 1) ret_list.append(posts_around.pop(rand_index)) return ret_list def get_similar_posts_ls(self) -> str: # TODO fix return self.generate_similar_posts_ls([self, self, self]) @staticmethod def generate_similar_posts_ls(other_posts: list['Post']) -> str: lines = ["total 0"] for post in other_posts: lines.append("lrwxrwxrwx 1 {} {} {} {} 11:11 '{}' -> {}".format( # TODO random time and fix filename escape Config.BLOG_OWNER, Config.BLOG_OWNER, len(post.get_fake_path()), post.get_publish_time(), post.title(), post.get_fake_path()) ) return "
".join(lines) @staticmethod def _extract_first_paragraph_text(root) -> str: for child in root.children: if isinstance(child, marko.block.Paragraph): paragraph_str = "" for part in child.children: if isinstance(part, marko.inline.RawText) and isinstance(part.children, str): paragraph_str += part.children if isinstance(part, marko.inline.LineBreak): if paragraph_str[-1:] != " ": paragraph_str += " " if isinstance(part, marko.inline.Link): for part_child in part.children: if isinstance(part_child, marko.inline.RawText) and isinstance(part_child.children, str): paragraph_str += part_child.children if len(paragraph_str) > INTRO_DESIRED_LENGTH: paragraph_str = paragraph_str[:INTRO_DESIRED_LENGTH] # cut to length # first try to cut at the last period, if it's not too far... last_dot_pos = paragraph_str.rfind(".") if INTRO_DESIRED_LENGTH - last_dot_pos > INTRO_MAX_EXTRA_LENGTH: intro_str = paragraph_str[:last_dot_pos + 1] else: paragraph_str += "..." # If too far, just add more dots return paragraph_str # return after the first paragraph return "" @staticmethod def _get_first_image_path(root) -> Optional[str]: if isinstance(root, marko.inline.Image): return root.dest if isinstance(root, ExtendedImageElement): return root.src if hasattr(root, 'children'): for elm in root.children: img = Post._get_first_image_path(elm) if img: return img return None def get_posts(path: str) -> list[Post]: return_list = [] for directory in os.listdir(path): return_list.append(Post(os.path.join(path, directory))) return_list.sort(key=lambda post: post.meta_data[PUBLISH_DATE_KEY], reverse=True) posts_cache.clear() posts_cache.extend(return_list) return return_list