initial commit
This commit is contained in:
164
.gitignore
vendored
Normal file
164
.gitignore
vendored
Normal file
@@ -0,0 +1,164 @@
|
|||||||
|
.vscode
|
||||||
|
|
||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py,cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
cover/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
.pybuilder/
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
# For a library or package, you might want to ignore these files since the code is
|
||||||
|
# intended to run in multiple environments; otherwise, check them in:
|
||||||
|
# .python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
|
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||||
|
# install all needed dependencies.
|
||||||
|
#Pipfile.lock
|
||||||
|
|
||||||
|
# poetry
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||||
|
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||||
|
# commonly ignored for libraries.
|
||||||
|
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||||
|
#poetry.lock
|
||||||
|
|
||||||
|
# pdm
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||||
|
#pdm.lock
|
||||||
|
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||||
|
# in version control.
|
||||||
|
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
||||||
|
.pdm.toml
|
||||||
|
.pdm-python
|
||||||
|
.pdm-build/
|
||||||
|
|
||||||
|
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Celery stuff
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# pytype static type analyzer
|
||||||
|
.pytype/
|
||||||
|
|
||||||
|
# Cython debug symbols
|
||||||
|
cython_debug/
|
||||||
|
|
||||||
|
# PyCharm
|
||||||
|
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||||
|
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||||
|
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||||
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
|
#.idea/
|
||||||
4
bild/selenium_imp/.gitignore
vendored
Normal file
4
bild/selenium_imp/.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
*.csv
|
||||||
|
*.txt
|
||||||
|
*.dump
|
||||||
|
*.json
|
||||||
11
bild/selenium_imp/archive_downloader.py
Normal file
11
bild/selenium_imp/archive_downloader.py
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
from bild_article_classes import ArticleCollection
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
ac = ArticleCollection(min_date = datetime.datetime(year=2022, month=1, day=1))
|
||||||
|
# ac = ArticleCollection(min_date = datetime.datetime.now())
|
||||||
|
ac.collect()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
620
bild/selenium_imp/bild_article_classes.py
Normal file
620
bild/selenium_imp/bild_article_classes.py
Normal file
@@ -0,0 +1,620 @@
|
|||||||
|
import contextlib
|
||||||
|
import datetime
|
||||||
|
import hashlib
|
||||||
|
import os
|
||||||
|
import pickle
|
||||||
|
import random
|
||||||
|
import time
|
||||||
|
from itertools import islice
|
||||||
|
from typing import Union
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
import dateutil
|
||||||
|
import requests
|
||||||
|
from requests.adapters import HTTPAdapter
|
||||||
|
from urllib3.util.retry import Retry
|
||||||
|
from urllib3.exceptions import MaxRetryError
|
||||||
|
from bs4 import BeautifulSoup, Comment, NavigableString
|
||||||
|
from bs4.element import PageElement
|
||||||
|
|
||||||
|
from util import ANSICodes as AC, failhandler, link as lk
|
||||||
|
|
||||||
|
|
||||||
|
class ArticleBaseClass:
|
||||||
|
_default_args = {
|
||||||
|
'cache': f'{os.getcwd()}/.cache',
|
||||||
|
'debug': False
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
if getattr(self, '_isinit', False):
|
||||||
|
return
|
||||||
|
|
||||||
|
kwargs = dict(list(ArticleBaseClass._default_args.items())+list(kwargs.items()))
|
||||||
|
if diff := set(kwargs.keys()).difference(ArticleBaseClass._default_args.keys()):
|
||||||
|
raise ValueError(f"keyword{'s' if len(diff) > 1 else ''} {', '.join(diff)} unknown. supported: {', '.join(self._default_args)}")
|
||||||
|
self.cache = kwargs.get('cache')
|
||||||
|
self._debug = kwargs.get('debug')
|
||||||
|
|
||||||
|
if self.cache:
|
||||||
|
if isinstance(self.cache, bool):
|
||||||
|
self.cache = ArticleBaseClass._default_args['cache']
|
||||||
|
os.makedirs(self.cache, exist_ok=True)
|
||||||
|
self._hash = hashlib.sha256()
|
||||||
|
|
||||||
|
# self.get_page = file_cache(cache_dir=self.cache, verbose=self._debug)(self.get_page)
|
||||||
|
self._isinit = True
|
||||||
|
|
||||||
|
def update_target_from_source(self, target: dict, source:dict):
|
||||||
|
for k, v in target.items():
|
||||||
|
if isinstance(v, dict):
|
||||||
|
if isinstance(sk := source.get(k), dict):
|
||||||
|
self.update_target_from_source(v, sk)
|
||||||
|
else:
|
||||||
|
target[k] = source.get(k)
|
||||||
|
|
||||||
|
def add_debug(self, target):
|
||||||
|
if isinstance(target, dict):
|
||||||
|
target['debug'] = self._debug
|
||||||
|
for _, v in target.items():
|
||||||
|
if isinstance(v, dict):
|
||||||
|
self.add_debug(v)
|
||||||
|
|
||||||
|
# @file_cache(cache_dir=self.cache)
|
||||||
|
def get_page(self, link):
|
||||||
|
def _get_page(link):
|
||||||
|
with self.get_session() as s:
|
||||||
|
page = s.get(link)
|
||||||
|
return page
|
||||||
|
|
||||||
|
if self.cache:
|
||||||
|
try:
|
||||||
|
self._hash.update(link.encode())
|
||||||
|
fname = self._hash.hexdigest()
|
||||||
|
with open(f'{self.cache.rstrip('/')}/{fname}', 'rb') as f:
|
||||||
|
# print(' -> cache hit!')
|
||||||
|
page = pickle.load(f)
|
||||||
|
except FileNotFoundError:
|
||||||
|
# print(' -> not yet in cache')
|
||||||
|
page = _get_page(link)
|
||||||
|
if self.cache:
|
||||||
|
with open(f'{self.cache.rstrip('/')}/{fname}', 'wb') as f:
|
||||||
|
pickle.dump(page, f)
|
||||||
|
else:
|
||||||
|
page = _get_page(link)
|
||||||
|
return page
|
||||||
|
|
||||||
|
def get_session(self):
|
||||||
|
local_session = self.session or requests.Session()
|
||||||
|
retry = Retry(connect=self._http_retries, backoff_factor=0.5)
|
||||||
|
adapter = HTTPAdapter(max_retries=retry)
|
||||||
|
local_session.mount('https://', adapter)
|
||||||
|
local_session.mount('http://', adapter)
|
||||||
|
return local_session
|
||||||
|
|
||||||
|
def close_session(self, session=None):
|
||||||
|
if session is None:
|
||||||
|
if self.session is not None:
|
||||||
|
self.session.close()
|
||||||
|
else:
|
||||||
|
session.close()
|
||||||
|
|
||||||
|
...
|
||||||
|
|
||||||
|
############
|
||||||
|
class ArticleTitle:
|
||||||
|
_default_args = {
|
||||||
|
'debug': False}
|
||||||
|
def __init__(self, title:str='', suptitle:str='', **kwargs) -> None:
|
||||||
|
self._debug = kwargs.get('debug', self._default_args['debug'])
|
||||||
|
|
||||||
|
|
||||||
|
self.title = ' '.join(title.strip().splitlines())
|
||||||
|
self.suptitle = ' '.join(suptitle.strip().splitlines())
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return f'{self.title}'
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
return f'({self.suptitle}) {self.title}'
|
||||||
|
|
||||||
|
...
|
||||||
|
|
||||||
|
############
|
||||||
|
class ArticleDepartment:
|
||||||
|
_default_args = {
|
||||||
|
'max_link_departments': 5,
|
||||||
|
'debug': False}
|
||||||
|
|
||||||
|
def __init__(self, department:str='', link:str='', **kwargs) -> None:
|
||||||
|
self._debug = kwargs.get('debug', self._default_args['debug'])
|
||||||
|
self._max_link_departments = kwargs.get('max_link_departments', self._default_args['max_link_departments'])
|
||||||
|
|
||||||
|
self.department = ' '.join(department.strip().splitlines())
|
||||||
|
|
||||||
|
# get departments from split url [example.com, ressort-1, ressort-2, ...]
|
||||||
|
self.departments_link = urlparse(link).path.split('/')[1:-1]
|
||||||
|
|
||||||
|
# generate link string
|
||||||
|
self._link_str = ' > '.join(self.departments_link)
|
||||||
|
|
||||||
|
# pad to max_link_departments
|
||||||
|
self.departments_link = (self.departments_link+self._max_link_departments*[''])[:self._max_link_departments]
|
||||||
|
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return f'{self.department}'
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
return f'{self.department} ({self._link_str})'
|
||||||
|
|
||||||
|
...
|
||||||
|
|
||||||
|
############
|
||||||
|
class ArticleMetadata:
|
||||||
|
_default_args = {
|
||||||
|
'department': ArticleDepartment._default_args,
|
||||||
|
'title': ArticleTitle._default_args,
|
||||||
|
'datetime_fmt': '%Y-%m-%d %H:%M:%S',
|
||||||
|
'debug': False}
|
||||||
|
|
||||||
|
def __init__(self, html:Union[PageElement, None]=None, base_url:str='example.com', date:Union[datetime.datetime,None]=None, **kwargs):
|
||||||
|
self._debug = kwargs.get('debug', self._default_args['debug'])
|
||||||
|
self._datetime_fmt = kwargs.get('datetime_fmt', self._default_args['datetime_fmt'])
|
||||||
|
|
||||||
|
|
||||||
|
self._title_kwargs = self._default_args['title']
|
||||||
|
if title_args := kwargs.get('title'):
|
||||||
|
self.update_target_from_source(self._title_kwargs, title_args)
|
||||||
|
self._add_debug(self._title_kwargs)
|
||||||
|
|
||||||
|
self._department_kwargs = self._default_args['department']
|
||||||
|
if department_args := kwargs.get('department'):
|
||||||
|
self.update_target_from_source(self._department_kwargs, department_args)
|
||||||
|
self._add_debug(self._department_kwargs)
|
||||||
|
|
||||||
|
self.page = None
|
||||||
|
|
||||||
|
self.base_url = base_url
|
||||||
|
if html is None:
|
||||||
|
self.create_empty()
|
||||||
|
else:
|
||||||
|
self.authors = None
|
||||||
|
self.parse_html(html, date)
|
||||||
|
|
||||||
|
def update_target_from_source(self, target: dict, source:dict):
|
||||||
|
for k, v in target.items():
|
||||||
|
if isinstance(v, dict):
|
||||||
|
if isinstance(sk := source.get(k), dict):
|
||||||
|
self.update_target_from_source(v, sk)
|
||||||
|
else:
|
||||||
|
target[k] = source.get(k)
|
||||||
|
|
||||||
|
def _add_debug(self, target):
|
||||||
|
if isinstance(target, dict):
|
||||||
|
target['debug'] = self._debug
|
||||||
|
for _, v in target.items():
|
||||||
|
if isinstance(v, dict):
|
||||||
|
self._add_debug(v)
|
||||||
|
|
||||||
|
def create_empty(self):
|
||||||
|
self.link = ''
|
||||||
|
self.time = datetime.time()
|
||||||
|
self.title = ArticleTitle()
|
||||||
|
self.department = ArticleDepartment()
|
||||||
|
self.authors = None
|
||||||
|
|
||||||
|
def parse_html(self, html:PageElement, date:Union[datetime.datetime,None]):
|
||||||
|
try:
|
||||||
|
href = html.find('a', {'class': 'stage-feed-item__link'}).attrs['href']
|
||||||
|
self.link = self.base_url+href
|
||||||
|
except (AttributeError, KeyError):
|
||||||
|
self.link = ''
|
||||||
|
|
||||||
|
try:
|
||||||
|
datestring = html.find('time').attrs['datetime']
|
||||||
|
self.time = dateutil.parser.parse(datestring).astimezone(dateutil.tz.tzlocal())
|
||||||
|
except (AttributeError, KeyError):
|
||||||
|
self.time = dateutil.parser.parse(datestring).astimezone(dateutil.tz.tzlocal()) if date else datetime.datetime()
|
||||||
|
|
||||||
|
try:
|
||||||
|
title = html.find('span', {'class': 'stage-feed-item__headline'}).contents[0]
|
||||||
|
except AttributeError:
|
||||||
|
title = ''
|
||||||
|
|
||||||
|
try:
|
||||||
|
suptitle = html.find('span', {'class': 'stage-feed-item__kicker'}).contents[0]
|
||||||
|
except AttributeError:
|
||||||
|
suptitle = ''
|
||||||
|
|
||||||
|
self.title = ArticleTitle(title, suptitle, **self._title_kwargs)
|
||||||
|
|
||||||
|
try:
|
||||||
|
department = html.find('span', {'class': 'stage-feed-item__channel'}).contents[0]
|
||||||
|
except AttributeError:
|
||||||
|
department = ''
|
||||||
|
|
||||||
|
self.department = ArticleDepartment(department, self.link, **self._department_kwargs)
|
||||||
|
|
||||||
|
def csv_line(self, delimiter:str=',', quote_char:str='"', newline=True):
|
||||||
|
def _quote(s:str):
|
||||||
|
return f'{quote_char}{s}{quote_char}'
|
||||||
|
|
||||||
|
elements = [
|
||||||
|
self.time.strftime('%Y-%m-%d') if self.time else '0000-00-00',
|
||||||
|
self.time.strftime('%H:%M:%S') if self.time else '00:00:00',
|
||||||
|
# self.time.strftime('%Y') if self.time else '00',
|
||||||
|
# self.time.strftime('%m') if self.time else '00',
|
||||||
|
# self.time.strftime('%d') if self.time else '00',
|
||||||
|
# self.time.strftime('%H') if self.time else '00',
|
||||||
|
# self.time.strftime('%M') if self.time else '00',
|
||||||
|
# self.time.strftime('%S') if self.time else '00',
|
||||||
|
_quote(self.title.title if self.title else ''),
|
||||||
|
_quote(self.title.suptitle if self.title else ''),
|
||||||
|
_quote(self.department.department if self.department else ''),
|
||||||
|
*[_quote(str(dep)) for dep in (self.department.departments_link if self.department else ['']*self._department_kwargs['max_link_departments'])],
|
||||||
|
_quote(self.link) or '',
|
||||||
|
str(self.page.status_code) if self.page else '']
|
||||||
|
|
||||||
|
return delimiter.join(elements) + ('\n' if newline else '')
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f'{self.title.title} ({self.time.strftime(self._datetime_fmt)})'
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return (
|
||||||
|
f'{self.title.suptitle}\n'
|
||||||
|
f'{self.title.title}\n'
|
||||||
|
f'{self.department.department}\n'
|
||||||
|
f'{self.department._link_str}\n'
|
||||||
|
f'{self.time.strftime(self._datetime_fmt)}\n'
|
||||||
|
f'{self.link}'
|
||||||
|
)
|
||||||
|
|
||||||
|
...
|
||||||
|
|
||||||
|
############
|
||||||
|
class Article(ArticleBaseClass):
|
||||||
|
_default_args = {
|
||||||
|
'http_retries': 3,
|
||||||
|
'meta': ArticleMetadata._default_args,
|
||||||
|
'debug': False,
|
||||||
|
'full_text_exclude': [
|
||||||
|
('aside', {'class': 'related-topics'}),
|
||||||
|
('figure', {}),
|
||||||
|
('div', {'class': 'ad-info'}),
|
||||||
|
('div', {'class': 'float-container'}),
|
||||||
|
('a', {'class': ['text-link--external', 'text-link']}),
|
||||||
|
]}
|
||||||
|
|
||||||
|
def __init__(self, *, link:str=None, metadata:Union[ArticleMetadata, None]=None, session=None, **kwargs):
|
||||||
|
super().__init__()
|
||||||
|
self._debug = kwargs.get('debug', self._default_args['debug'])
|
||||||
|
self._http_retries = kwargs.get('http_retries', self._default_args['http_retries'])
|
||||||
|
self._meta_kwargs = self._default_args['meta']
|
||||||
|
if meta_args := kwargs.get('meta'):
|
||||||
|
self.update_target_from_source(self._meta_kwargs, meta_args)
|
||||||
|
self.add_debug(self._meta_kwargs)
|
||||||
|
self.full_text_exclude = kwargs.get('full_text_exclude', self._default_args['full_text_exclude'])
|
||||||
|
|
||||||
|
self.session = session
|
||||||
|
self.meta = metadata or ArticleMetadata(**self._meta_kwargs)
|
||||||
|
self.meta.link = link or self.meta.link
|
||||||
|
self.full_text = None
|
||||||
|
|
||||||
|
self.parse_page(self.meta.link)
|
||||||
|
|
||||||
|
# parsers
|
||||||
|
def parse_page(self, link):
|
||||||
|
self.meta.page = self.get_page(link)
|
||||||
|
|
||||||
|
soupy_page = BeautifulSoup(self.meta.page.content, 'html.parser')
|
||||||
|
|
||||||
|
if article := soupy_page.find('article'):
|
||||||
|
self.parse_article(article)
|
||||||
|
|
||||||
|
if error_page := soupy_page.find('div', {'class': 'error-page'}):
|
||||||
|
self.parse_error_page(error_page)
|
||||||
|
|
||||||
|
def parse_error_page(self, error_page):
|
||||||
|
with contextlib.suppress(AttributeError):
|
||||||
|
wrapper = error_page.find('div', {'class': 'error-page__wrapper'})
|
||||||
|
self.full_text = self.get_fulltext(wrapper, exclude=(('a'),))
|
||||||
|
|
||||||
|
def parse_article(self, article):
|
||||||
|
with contextlib.suppress(AttributeError):
|
||||||
|
self.meta.title.title = self.get_fulltext(article.find('span', {'class': 'document-title__headline'}))
|
||||||
|
with contextlib.suppress(AttributeError):
|
||||||
|
self.meta.title.suptitle = self.get_fulltext(article.find('span', {'class': 'document-title__kicker'}))
|
||||||
|
|
||||||
|
with contextlib.suppress(AttributeError):
|
||||||
|
if article.find('div', {'class': 'author'}):
|
||||||
|
self.meta.authors = [self.get_fulltext(article.find('span', {'class': 'author__name'}))]
|
||||||
|
elif article.find('div', {'class': 'authors'}):
|
||||||
|
authors = article.find_all('div', {'class': 'article_author__details'})
|
||||||
|
self.meta.authors = [self.get_fulltext(details) for details in authors]
|
||||||
|
|
||||||
|
with contextlib.suppress(AttributeError, KeyError):
|
||||||
|
if date := article.find('time', {'class': ['datetime']}):
|
||||||
|
datestring = date.attrs['datetime']
|
||||||
|
self.meta.time = dateutil.parser.parse(datestring).astimezone(dateutil.tz.tzlocal())
|
||||||
|
|
||||||
|
with contextlib.suppress(AttributeError):
|
||||||
|
body = article.find_all('div', {'class': 'article-body'})
|
||||||
|
self.full_text = self.get_fulltext(body)
|
||||||
|
|
||||||
|
def _clean_exclude_list(self, excludes):
|
||||||
|
if excludes is None:
|
||||||
|
return excludes
|
||||||
|
excl_names = []
|
||||||
|
excl_attrs = []
|
||||||
|
for excl in excludes:
|
||||||
|
if isinstance(excl, (list, tuple)):
|
||||||
|
excl_names.append(excl[0])
|
||||||
|
try:
|
||||||
|
local_attr = {
|
||||||
|
k: v if isinstance(v, (list, tuple)) else [v]
|
||||||
|
for k, v in excl[1].items()
|
||||||
|
}
|
||||||
|
excl_attrs.append(local_attr)
|
||||||
|
except (KeyError, IndexError):
|
||||||
|
excl_attrs.append({})
|
||||||
|
else:
|
||||||
|
excl_names.append(excl)
|
||||||
|
excl_attrs.append({})
|
||||||
|
|
||||||
|
return list(zip(excl_names, excl_attrs))
|
||||||
|
# return excl_names,excl_attrs
|
||||||
|
|
||||||
|
def skip_element(self, elm, excludes):
|
||||||
|
if isinstance(elm, Comment):
|
||||||
|
return True
|
||||||
|
if excludes is None:
|
||||||
|
return False
|
||||||
|
for excl_name, excl_attr in excludes:
|
||||||
|
if elm.name == excl_name:
|
||||||
|
if not excl_attr:
|
||||||
|
return True
|
||||||
|
for k, v in excl_attr.items():
|
||||||
|
with contextlib.suppress(KeyError):
|
||||||
|
if elm.attrs[k] == v:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_fulltext(self, html:Union[PageElement, list], exclude:Union[list, None]=None, sep:str=' '):
|
||||||
|
if html is None:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
if exclude is not None:
|
||||||
|
exclude = self._clean_exclude_list(tuple(exclude))
|
||||||
|
else:
|
||||||
|
exclude = self.full_text_exclude
|
||||||
|
|
||||||
|
local_elems = []
|
||||||
|
for elm in html:
|
||||||
|
if self.skip_element(elm, exclude):
|
||||||
|
continue
|
||||||
|
if isinstance(elm, NavigableString):
|
||||||
|
local_elems.append(elm)
|
||||||
|
elif isinstance(elm, PageElement):
|
||||||
|
local_elem = self.get_fulltext(elm, exclude=exclude, sep=sep)
|
||||||
|
local_elems.append(local_elem)
|
||||||
|
|
||||||
|
|
||||||
|
return sep.join(local_elems).strip()
|
||||||
|
|
||||||
|
# util
|
||||||
|
def to_csv_line(self, delimiter:str=',', quote_char:str='"', newline=True):
|
||||||
|
def _quote(s:str):
|
||||||
|
return f'{quote_char}{s}{quote_char}'
|
||||||
|
|
||||||
|
line = delimiter.join((
|
||||||
|
self.meta.csv_line(delimiter=delimiter, quote_char=quote_char, newline=False),
|
||||||
|
_quote(' '.join(self.full_text.splitlines())) if self.full_text else '')
|
||||||
|
) + ('\n' if newline else '')
|
||||||
|
|
||||||
|
return line
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
department = self.meta.department.department if self.meta.department else ''
|
||||||
|
title = self.meta.title.title if self.meta.title else ''
|
||||||
|
full_text = self.full_text or ''
|
||||||
|
datestr = self.meta.time.strftime('%d.%m.%Y %H:%M:%S') if self.meta.time else ''
|
||||||
|
return f'[{department}] {title} ({datestr}): {islice(full_text, 100)}...'
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
return (
|
||||||
|
f'{self.meta.title.suptitle if self.meta.title else ''}\n'
|
||||||
|
f'{self.meta.title.title if self.meta.title else ''}\n'
|
||||||
|
f'{self.meta.department.department if self.meta.department else ''}\n'
|
||||||
|
f'{self.meta.department._link_str if self.meta.department else ''}\n'
|
||||||
|
f'{self.meta.time.strftime('%d.%m.%Y %H:%M:%S') if self.meta.time else ''}\n'
|
||||||
|
f'{self.meta.link or ''} {[self.meta.page.status_code]}\n'
|
||||||
|
f'{self.full_text or ''}\n'
|
||||||
|
)
|
||||||
|
...
|
||||||
|
|
||||||
|
############
|
||||||
|
class ArticleCollection(ArticleBaseClass):
|
||||||
|
_default_args = {
|
||||||
|
'min_date': datetime.datetime(year=2006, month=1, day=6),
|
||||||
|
'max_date': datetime.datetime.now(),
|
||||||
|
'random': True,
|
||||||
|
'out_file': 'out.csv',
|
||||||
|
'out_file_mode': 'new',
|
||||||
|
'out_file_header': 'date,time,title,suptitle,department,[link_departments],link,http status code,full text',
|
||||||
|
'failed_file': 'failed.txt',
|
||||||
|
'http_retries': 5,
|
||||||
|
'retries': 2,
|
||||||
|
'base_link': 'https://www.bild.de/themen/uebersicht/archiv/archiv-82532020.bild.html?archiveDate=',
|
||||||
|
'link_time_format': '%Y-%m-%d',
|
||||||
|
'article_args': Article._default_args,
|
||||||
|
'debug': False,
|
||||||
|
}
|
||||||
|
|
||||||
|
_file_modes_overwrite = ('new', 'overwrite', 'write', 'w')
|
||||||
|
_file_modes_append = ('append', 'a')
|
||||||
|
_file_modes = (*_file_modes_overwrite, *_file_modes_append)
|
||||||
|
|
||||||
|
def __init__(self, session:Union[requests.Session,None]=None, **kwargs):
|
||||||
|
self._debug = kwargs.get('debug', self._default_args['debug'])
|
||||||
|
super().__init__(debug=self._debug)
|
||||||
|
|
||||||
|
self._min_date = kwargs.get('min_date', self._default_args['min_date'])
|
||||||
|
self._max_date = kwargs.get('max_date', self._default_args['max_date'])
|
||||||
|
self._max_date = self._max_date.date()
|
||||||
|
self._min_date = self._min_date.date()
|
||||||
|
|
||||||
|
self._random = kwargs.get('random', self._default_args['random'])
|
||||||
|
|
||||||
|
|
||||||
|
self._article_args = self._default_args['article_args']
|
||||||
|
if article_args := kwargs.get('article_args'):
|
||||||
|
self.update_target_from_source(self._article_args, article_args)
|
||||||
|
self.add_debug(self._article_args)
|
||||||
|
|
||||||
|
|
||||||
|
self._out_file = kwargs.get('out_file', self._default_args['out_file'])
|
||||||
|
self._out_file_mode = kwargs.get('out_file_mode', self._default_args['out_file_mode'])
|
||||||
|
if self._out_file_mode not in self._file_modes:
|
||||||
|
raise AttributeError(f'file mode {self._out_file_mode} unknown. supported: [{','.join(self._file_modes)}]')
|
||||||
|
|
||||||
|
|
||||||
|
self._out_file_header = kwargs.get('out_file_header', self._default_args['out_file_header'])
|
||||||
|
max_link_departments = self._article_args.get('meta', {}).get('department', {}).get('max_link_departments', self._default_args['article_args']['meta']['department']['max_link_departments'])
|
||||||
|
link_dep_strings = [f'department from link {i}' for i in range(max_link_departments)]
|
||||||
|
self._out_file_header = self._out_file_header.replace('[link_departments]', ','.join(link_dep_strings))
|
||||||
|
|
||||||
|
self._failed_file = kwargs.get('failed_file', self._default_args['failed_file'])
|
||||||
|
self._http_retries = kwargs.get('http_retries', self._default_args['http_retries'])
|
||||||
|
self._retries = kwargs.get('retries', self._default_args['retries'])
|
||||||
|
self._base_link = kwargs.get('base_link', self._default_args['base_link'])
|
||||||
|
self._link_time_format = kwargs.get('link_time_format', self._default_args['link_time_format'])
|
||||||
|
|
||||||
|
self.prepare_dates()
|
||||||
|
|
||||||
|
self.prepare_files()
|
||||||
|
|
||||||
|
self.articles = []
|
||||||
|
self.article_metas = []
|
||||||
|
self.session = session
|
||||||
|
|
||||||
|
self.get_page = failhandler(callback=self.write_failed_to_file)(lambda args: ArticleCollection.get_page(self, args))
|
||||||
|
|
||||||
|
def prepare_dates(self):
|
||||||
|
self.dates = [self._max_date - datetime.timedelta(days=x) for x in range((self._max_date - self._min_date).days+1)]
|
||||||
|
if self._random:
|
||||||
|
random.shuffle(self.dates)
|
||||||
|
|
||||||
|
def collect(self):
|
||||||
|
self.session = self.get_session()
|
||||||
|
|
||||||
|
print(f'Collecting article metadata from archive pages for {len(self.dates)} days')
|
||||||
|
for i, date in enumerate(self.dates):
|
||||||
|
link = self.build_archive_link(date)
|
||||||
|
self.print_date(date, link, prefix=f'Date {i+1:>{len(str(len(self.dates)))}}/{len(self.dates)} ')
|
||||||
|
self.process_archive_page(link)
|
||||||
|
|
||||||
|
print()
|
||||||
|
print(f'Collecting fulltext for {len(self.article_metas)} articles')
|
||||||
|
self.get_fulltexts()
|
||||||
|
|
||||||
|
self.close_session()
|
||||||
|
|
||||||
|
def build_archive_link(self, date):
|
||||||
|
return f'{self._base_link}{date.strftime(self._link_time_format)}'
|
||||||
|
|
||||||
|
def print_date(self, date:datetime.datetime, link:str=None, fmt:str=None, prefix:str=None, suffix:str=None):
|
||||||
|
if fmt is None:
|
||||||
|
fmt = self._link_time_format
|
||||||
|
print(f'{prefix or ''}{AC.UNDERLINE}{lk(link,date.strftime(fmt)) if link else date.strftime(fmt)}{AC.DEFAULT}{suffix or ''}')
|
||||||
|
|
||||||
|
def prepare_files(self):
|
||||||
|
if self._out_file_mode in self._file_modes:
|
||||||
|
if self._out_file_mode in self._file_modes_overwrite and self._out_file:
|
||||||
|
with open(self._out_file, 'w') as f:
|
||||||
|
f.write(self._out_file_header.strip()+'\n')
|
||||||
|
elif self._out_file_mode in self._file_modes_append and self._out_file:
|
||||||
|
...
|
||||||
|
else:
|
||||||
|
raise ValueError(f'file mode \'{self._out_file_mode}\' not supported. supported: {self._file_modes}')
|
||||||
|
if self._failed_file:
|
||||||
|
with open(self._failed_file, 'w') as f:
|
||||||
|
f.write('')
|
||||||
|
|
||||||
|
def process_archive_page(self, link):
|
||||||
|
page = self.get_page(link)
|
||||||
|
soupy_page = BeautifulSoup(page.content, 'html.parser')
|
||||||
|
articles_html = soupy_page.find_all("article", {"class": "stage-feed-item"})
|
||||||
|
slice_args = (None, 3, None) if self._debug else (None, None, 1)
|
||||||
|
|
||||||
|
for article_html in islice(articles_html, *slice_args): # debugging
|
||||||
|
article_metadata = ArticleMetadata(article_html, 'https://www.bild.de', **self._article_args.get('meta', {}))
|
||||||
|
self.print_article_metadata(article_metadata)
|
||||||
|
# save metadata
|
||||||
|
self.article_metas.append(article_metadata)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def get_fulltexts(self):
|
||||||
|
if self._random:
|
||||||
|
random.shuffle(self.article_metas)
|
||||||
|
for i, article_metadata in enumerate(self.article_metas):
|
||||||
|
self.print_article_metadata(article_metadata, prefix=f'{i+1:>{len(str(len(self.article_metas)))}}/{len(self.article_metas)} ')
|
||||||
|
self.process_article_from_meta(article_metadata)
|
||||||
|
|
||||||
|
|
||||||
|
def process_article_from_meta(self, article_metadata):
|
||||||
|
try:
|
||||||
|
art = Article(metadata=article_metadata, session=self.session, **self._article_args)
|
||||||
|
self.articles.append(art)
|
||||||
|
if self._out_file:
|
||||||
|
with open(self._out_file, 'a') as f:
|
||||||
|
f.write(art.to_csv_line())
|
||||||
|
except (MaxRetryError,ConnectionError) as e:
|
||||||
|
if self._debug:
|
||||||
|
print(e)
|
||||||
|
self.write_failed_to_file(e, article_metadata)
|
||||||
|
|
||||||
|
|
||||||
|
def print_article_metadata(self, metadata, *, date_fmt=None, time_fmt=None, prefix:str=None, suffix:str=None):
|
||||||
|
if date_fmt is None:
|
||||||
|
date_fmt = self._link_time_format
|
||||||
|
if time_fmt is None:
|
||||||
|
time_fmt = '%H:%M:%S'
|
||||||
|
datetime_fmt = f'{date_fmt} {time_fmt}'
|
||||||
|
|
||||||
|
link = '' or metadata.link
|
||||||
|
|
||||||
|
timestr = (
|
||||||
|
AC.FG_BRIGHT_GREY +
|
||||||
|
metadata.time.strftime(datetime_fmt)
|
||||||
|
+ AC.DEFAULT
|
||||||
|
) if metadata.time else ''
|
||||||
|
|
||||||
|
suptitle = (
|
||||||
|
AC.FG_BLUE +
|
||||||
|
metadata.title.suptitle
|
||||||
|
+ AC.DEFAULT
|
||||||
|
) if metadata.title.suptitle else ''
|
||||||
|
|
||||||
|
title = (
|
||||||
|
AC.STYLE_BOLD + AC.FG_BRIGHT_PURPLE +
|
||||||
|
metadata.title.title
|
||||||
|
+ AC.DEFAULT
|
||||||
|
) if metadata.title.title else ''
|
||||||
|
|
||||||
|
error_string = (
|
||||||
|
AC.STYLE_BOLD + AC.FG_BRIGHT_RED +
|
||||||
|
f'[{metadata.page.status_code}]'
|
||||||
|
+ AC.DEFAULT
|
||||||
|
) if (metadata.page and metadata.page.status_code != 200) else ''
|
||||||
|
|
||||||
|
print(f'{prefix or ''}{timestr} {error_string}({suptitle}) {lk(link, title) if link else title}{suffix or ''}')
|
||||||
|
|
||||||
|
def write_failed_to_file(self, exception, elem):
|
||||||
|
with open(self._failed_file, 'a') as f:
|
||||||
|
if isinstance(elem, ArticleMetadata):
|
||||||
|
f.write(f'{elem.link}, "{exception}"\n')
|
||||||
|
elif isinstance(elem, str):
|
||||||
|
f.write(f'{elem}, "{exception}"\n')
|
||||||
88
bild/selenium_imp/scratch.py
Normal file
88
bild/selenium_imp/scratch.py
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
import itertools
|
||||||
|
def print_format_table():
|
||||||
|
"""
|
||||||
|
prints table of formatted text format options
|
||||||
|
"""
|
||||||
|
# SGR arguments:
|
||||||
|
# n Name Note
|
||||||
|
# 0 Reset or normal All attributes become turned off
|
||||||
|
# 1 Bold or increased intensity As with faint, the color change is a PC (SCO / CGA) invention.[22][better source needed]
|
||||||
|
# 2 Faint, decreased intensity, or dim May be implemented as a light font weight like bold.[23]
|
||||||
|
# 3 Italic Not widely supported. Sometimes treated as inverse or blink.[22]
|
||||||
|
# 4 Underline Style extensions exist for Kitty, VTE, mintty, iTerm2 and Konsole.[24][25][26]
|
||||||
|
# 5 Slow blink Sets blinking to less than 150 times per minute
|
||||||
|
# 6 Rapid blink MS-DOS ANSI.SYS, 150+ per minute; not widely supported
|
||||||
|
# 7 Reverse video or invert Swap foreground and background colors; inconsistent emulation[27][dubious – discuss]
|
||||||
|
# 8 Conceal or hide Not widely supported.
|
||||||
|
# 9 Crossed-out, or strike Characters legible but marked as if for deletion. Not supported in Terminal.app.
|
||||||
|
# 10 Primary (default) font
|
||||||
|
# 11–19 Alternative font Select alternative font n − 10
|
||||||
|
# 20 Fraktur (Gothic) Rarely supported
|
||||||
|
# 21 Doubly underlined; or: not bold Double-underline per ECMA-48,[5]: 8.3.117 but instead disables bold intensity on several terminals, including in the Linux kernel's console before version 4.17.[28]
|
||||||
|
# 22 Normal intensity Neither bold nor faint; color changes where intensity is implemented as such.
|
||||||
|
# 23 Neither italic, nor blackletter
|
||||||
|
# 24 Not underlined Neither singly nor doubly underlined
|
||||||
|
# 25 Not blinking Turn blinking off
|
||||||
|
# 26 Proportional spacing ITU T.61 and T.416, not known to be used on terminals
|
||||||
|
# 27 Not reversed
|
||||||
|
# 28 Reveal Not concealed
|
||||||
|
# 29 Not crossed out
|
||||||
|
# 30–37 Set foreground color
|
||||||
|
# 38 Set foreground color Next arguments are 5;n or 2;r;g;b
|
||||||
|
# 39 Default foreground color Implementation defined (according to standard)
|
||||||
|
# 40–47 Set background color
|
||||||
|
# 48 Set background color Next arguments are 5;n or 2;r;g;b
|
||||||
|
# 49 Default background color Implementation defined (according to standard)
|
||||||
|
# 50 Disable proportional spacing T.61 and T.416
|
||||||
|
# 51 Framed Implemented as "emoji variation selector" in mintty.[29]
|
||||||
|
# 52 Encircled
|
||||||
|
# 53 Overlined Not supported in Terminal.app
|
||||||
|
# 54 Neither framed nor encircled
|
||||||
|
# 55 Not overlined
|
||||||
|
# 58 Set underline color Not in standard; implemented in Kitty, VTE, mintty, and iTerm2.[24][25] Next arguments are 5;n or 2;r;g;b.
|
||||||
|
# 59 Default underline color Not in standard; implemented in Kitty, VTE, mintty, and iTerm2.[24][25]
|
||||||
|
# 60 Ideogram underline or right side line Rarely supported
|
||||||
|
# 61 Ideogram double underline, or double line on the right side
|
||||||
|
# 62 Ideogram overline or left side line
|
||||||
|
# 63 Ideogram double overline, or double line on the left side
|
||||||
|
# 64 Ideogram stress marking
|
||||||
|
# 65 No ideogram attributes Reset the effects of all of 60–64
|
||||||
|
# 73 Superscript Implemented only in mintty[29]
|
||||||
|
# 74 Subscript
|
||||||
|
# 75 Neither superscript nor subscript
|
||||||
|
# 90–97 Set bright foreground color Not in standard; originally implemented by aixterm[13]
|
||||||
|
# 100–107 Set bright background color
|
||||||
|
|
||||||
|
print(''.join(
|
||||||
|
f'\x1b[0;{command}m\\x1b[{command}m\x1b[m{' '*(3-len(str(command)))}{' ' if (command + 1) % 18 else '\n'}'
|
||||||
|
for command in range(108)
|
||||||
|
))
|
||||||
|
|
||||||
|
supported = (0, # reset
|
||||||
|
1, # bold
|
||||||
|
2, # dim
|
||||||
|
22, # normal intensity
|
||||||
|
3, # italic
|
||||||
|
23, # ? neither italic nor blackletter
|
||||||
|
53, # overlined
|
||||||
|
55, # not overlined
|
||||||
|
4, # underline
|
||||||
|
21, # dunderline
|
||||||
|
24, # ? not underlined
|
||||||
|
9, # strike
|
||||||
|
29, # not strike
|
||||||
|
7, # invert
|
||||||
|
27, # not inverted
|
||||||
|
8, # hidden
|
||||||
|
28, # not hidden
|
||||||
|
30, 31, 32, 33, 34, 35, 36, 37, # fg color
|
||||||
|
38, # fg color 38;5;n or 38;2;r;g;b
|
||||||
|
39, # reset fg color
|
||||||
|
40, 41, 42, 43, 44, 45, 46, 47, # bg color
|
||||||
|
48, # bg color 48;5;n or 48;2;r;g;b
|
||||||
|
49, # reset bg color
|
||||||
|
90, 91, 92, 93, 94, 95, 96, 97,
|
||||||
|
100, 101, 102, 103, 104, 105, 106, 107
|
||||||
|
)
|
||||||
|
|
||||||
|
print_format_table()
|
||||||
120
bild/selenium_imp/util.py
Normal file
120
bild/selenium_imp/util.py
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
from functools import wraps
|
||||||
|
import random
|
||||||
|
|
||||||
|
|
||||||
|
def link(uri, label=None):
|
||||||
|
if label is None:
|
||||||
|
label = uri
|
||||||
|
parameters = ''
|
||||||
|
|
||||||
|
# OSC 8 ; params ; URI ST <name> OSC 8 ;; ST
|
||||||
|
escape_mask = '\033]8;{};{}\033\\{}\033]8;;\033\\'
|
||||||
|
|
||||||
|
return escape_mask.format(parameters, uri, label)
|
||||||
|
|
||||||
|
class ANSICodes:
|
||||||
|
DEFAULT = OFF = '\x1b[0m'
|
||||||
|
BOLD = STYLE_BOLD = '\x1b[1m'
|
||||||
|
DIM = STYLE_DIM = '\x1b[2m'
|
||||||
|
REGULAR = STYLE_REGULAR = '\x1b[22m'
|
||||||
|
ITALIC = STYLE_ITALIC = '\x1b[3m'
|
||||||
|
NITALIC = STYLE_NITALIC = '\x1b[23m'
|
||||||
|
UNDERLINE = DECO_UNDERLINE = '\x1b[4m'
|
||||||
|
DUNDERLINE = DECO_DUNDERLINE = '\x1b[21m'
|
||||||
|
NUNDERLINE = DECO_NUNDERLINE = '\x1b[24m'
|
||||||
|
OVERLINE = DECO_OVERLINE = '\x1b[53m'
|
||||||
|
NOVERLINE = DECO_NOVERLINE = '\x1b[55m'
|
||||||
|
INVERT = DECO_INVERT = '\x1b[7m'
|
||||||
|
NINVERT = DECO_NINVERT = '\x1b[27m'
|
||||||
|
HIDDEN = DECO_HIDDEN = '\x1b[8m'
|
||||||
|
NHIDDEN = DECO_NHIDDEN = '\x1b[28m'
|
||||||
|
STRIKE = DECO_STRIKE = '\x1b[9m'
|
||||||
|
NSTRIKE = DECO_NSTRIKE = '\x1b[29m'
|
||||||
|
|
||||||
|
GREY = FG_GREY = '\x1b[30m'
|
||||||
|
RED = FG_RED = '\x1b[31m'
|
||||||
|
GREEN = FG_GREEN = '\x1b[32m'
|
||||||
|
YELLOW = FG_YELLOW = '\x1b[33m'
|
||||||
|
PURPLE = FG_PURPLE = '\x1b[34m'
|
||||||
|
PINK = FG_PINK = '\x1b[35m'
|
||||||
|
BLUE = FG_BLUE = '\x1b[36m'
|
||||||
|
WHITE = FG_WHITE = '\x1b[37m'
|
||||||
|
BRIGHT_GREY = FG_BRIGHT_GREY = '\x1b[90m'
|
||||||
|
BRIGHT_RED = FG_BRIGHT_RED = '\x1b[91m'
|
||||||
|
BRIGHT_GREEN = FG_BRIGHT_GREEN = '\x1b[92m'
|
||||||
|
BRIGHT_YELLOW = FG_BRIGHT_YELLOW = '\x1b[93m'
|
||||||
|
BRIGHT_PURPLE = FG_BRIGHT_PURPLE = '\x1b[94m'
|
||||||
|
BRIGHT_PINK = FG_BRIGHT_PINK = '\x1b[95m'
|
||||||
|
BRIGHT_BLUE = FG_BRIGHT_BLUE = '\x1b[96m'
|
||||||
|
BRIGHT_WHITE = FG_BRIGHT_WHITE = '\x1b[97m'
|
||||||
|
|
||||||
|
BG_GREY = '\x1b[40m'
|
||||||
|
BG_RED = '\x1b[41m'
|
||||||
|
BG_GREEN = '\x1b[42m'
|
||||||
|
BG_YELLOW = '\x1b[43m'
|
||||||
|
BG_PURPLE = '\x1b[44m'
|
||||||
|
BG_PINK = '\x1b[45m'
|
||||||
|
BG_BLUE = '\x1b[46m'
|
||||||
|
BG_WHITE = '\x1b[47m'
|
||||||
|
BG_BRIGHT_BLUE = '\x1b[100m'
|
||||||
|
BG_BRIGHT_RED = '\x1b[101m'
|
||||||
|
BG_BRIGHT_GREEN = '\x1b[102m'
|
||||||
|
BG_BRIGHT_YELLOW = '\x1b[103m'
|
||||||
|
BG_BRIGHT_PURPLE = '\x1b[104m'
|
||||||
|
BG_BRIGHT_PINK = '\x1b[105m'
|
||||||
|
BG_BRIGHT_BLUE = '\x1b[106m'
|
||||||
|
BG_BRIGHT_WHITE = '\x1b[107m'
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def FG_CUSTOM_N(n,/):
|
||||||
|
# 0- 7: standard colors (as in ESC [ 30–37 m)
|
||||||
|
# 8- 15: high intensity colors (as in ESC [ 90–97 m)
|
||||||
|
# 16-231: 6 × 6 × 6 cube (216 colors): 16 + 36 × r + 6 × g + b (0 ≤ r, g, b ≤ 5)
|
||||||
|
# 232-255: grayscale from dark to light in 24 steps
|
||||||
|
return f'\x1b[38;5;{n}m'
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def FG_CUSTOM_RGB(r,g,b,/):
|
||||||
|
# r, g, b: 0-255
|
||||||
|
return f'\x1b[38;5;{r};{g};{b}m'
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def BG_CUSTOM_N(n,/):
|
||||||
|
# 0- 7: standard colors (as in ESC [ 30–37 m)
|
||||||
|
# 8- 15: high intensity colors (as in ESC [ 90–97 m)
|
||||||
|
# 16-231: 6 × 6 × 6 cube (216 colors): 16 + 36 × r + 6 × g + b (0 ≤ r, g, b ≤ 5)
|
||||||
|
# 232-255: grayscale from dark to light in 24 steps
|
||||||
|
return f'\x1b[48;5;{n}m'
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def BG_CUSTOM_RGB(r,g,b,/):
|
||||||
|
# r, g, b: 0-255
|
||||||
|
return f'\x1b[48;5;{r};{g};{b}m'
|
||||||
|
|
||||||
|
|
||||||
|
def debugging_rand(chance):
|
||||||
|
import random
|
||||||
|
class RandomException(Exception):
|
||||||
|
def __init__(self, *args):
|
||||||
|
super().__init__(*args)
|
||||||
|
|
||||||
|
if chance > 1:
|
||||||
|
chance /= 100
|
||||||
|
|
||||||
|
a = random.random()
|
||||||
|
if a <= chance:
|
||||||
|
raise RandomException(f'RandomException {chance*100}%')
|
||||||
|
|
||||||
|
|
||||||
|
def failhandler(callback, exceptions:Union[tuple, list, Exception, None]=None):
|
||||||
|
if exceptions is None:
|
||||||
|
exceptions = Exception
|
||||||
|
def fail_decorator(func):
|
||||||
|
@wraps(func)
|
||||||
|
def wrapped_function(*args, **kwargs):
|
||||||
|
try:
|
||||||
|
return func(*args, **kwargs)
|
||||||
|
except exceptions as e:
|
||||||
|
callback(e,*args,**kwargs)
|
||||||
|
return wrapped_function
|
||||||
|
return fail_decorator
|
||||||
Reference in New Issue
Block a user