ArchiveBox/archivebox/config/django.py

177 lines
7.4 KiB
Python

__package__ = 'archivebox.config'
import os
import sys
from datetime import datetime, timezone
from rich.progress import Progress
from rich.console import Console
import django
from archivebox.misc import logging
from . import CONSTANTS
from .common import SHELL_CONFIG
if not SHELL_CONFIG.USE_COLOR:
os.environ['NO_COLOR'] = '1'
if not SHELL_CONFIG.SHOW_PROGRESS:
os.environ['TERM'] = 'dumb'
# recreate rich console obj based on new config values
STDOUT = CONSOLE = Console()
STDERR = Console(stderr=True)
logging.CONSOLE = CONSOLE
INITIAL_STARTUP_PROGRESS = None
INITIAL_STARTUP_PROGRESS_TASK = 0
def bump_startup_progress_bar(advance=1):
global INITIAL_STARTUP_PROGRESS
global INITIAL_STARTUP_PROGRESS_TASK
if INITIAL_STARTUP_PROGRESS:
INITIAL_STARTUP_PROGRESS.update(INITIAL_STARTUP_PROGRESS_TASK, advance=advance) # type: ignore
def setup_django_minimal():
# sys.path.append(str(CONSTANTS.PACKAGE_DIR))
# os.environ.setdefault('ARCHIVEBOX_DATA_DIR', str(CONSTANTS.DATA_DIR))
# os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
# django.setup()
raise Exception('dont use this anymore')
DJANGO_SET_UP = False
def setup_django(check_db=False, in_memory_db=False) -> None:
from rich.panel import Panel
global INITIAL_STARTUP_PROGRESS
global INITIAL_STARTUP_PROGRESS_TASK
global DJANGO_SET_UP
if DJANGO_SET_UP:
# raise Exception('django is already set up!')
# TODO: figure out why CLI entrypoints with init_pending are running this twice sometimes
return
with Progress(transient=True, expand=True, console=STDERR) as INITIAL_STARTUP_PROGRESS:
INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25, visible=True)
from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, SudoPermission
# if running as root, chown the data dir to the archivebox user to make sure it's accessible to the archivebox user
if IS_ROOT and ARCHIVEBOX_USER != 0:
with SudoPermission(uid=0):
# running as root is a special case where it's ok to be a bit slower
# make sure data dir is always owned by the correct user
os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{CONSTANTS.DATA_DIR}" 2>/dev/null')
os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{CONSTANTS.DATA_DIR}"/* 2>/dev/null')
bump_startup_progress_bar()
try:
from django.core.management import call_command
bump_startup_progress_bar()
if in_memory_db:
raise Exception('dont use this anymore')
# some commands (e.g. oneshot) dont store a long-lived sqlite3 db file on disk.
# in those cases we create a temporary in-memory db and run the migrations
# immediately to get a usable in-memory-database at startup
os.environ.setdefault("ARCHIVEBOX_DATABASE_NAME", ":memory:")
django.setup()
bump_startup_progress_bar()
call_command("migrate", interactive=False, verbosity=0)
else:
# Otherwise use default sqlite3 file-based database and initialize django
# without running migrations automatically (user runs them manually by calling init)
try:
django.setup()
except Exception as e:
bump_startup_progress_bar(advance=1000)
is_using_meta_cmd = any(ignored_subcommand in sys.argv for ignored_subcommand in ('help', 'version', '--help', '--version'))
if not is_using_meta_cmd:
# show error message to user only if they're not running a meta command / just trying to get help
STDERR.print()
STDERR.print(Panel(
f'\n[red]{e.__class__.__name__}[/red]: [yellow]{e}[/yellow]\nPlease check your config and [blue]DATA_DIR[/blue] permissions.\n',
title='\n\n[red][X] Error while trying to load database![/red]',
subtitle='[grey53]NO WRITES CAN BE PERFORMED[/grey53]',
expand=False,
style='bold red',
))
STDERR.print()
STDERR.print_exception(show_locals=False)
return
bump_startup_progress_bar()
from django.conf import settings
# log startup message to the error log
with open(settings.ERROR_LOG, "a", encoding='utf-8') as f:
command = ' '.join(sys.argv)
ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S')
f.write(f"\n> {command}; TS={ts} VERSION={CONSTANTS.VERSION} IN_DOCKER={SHELL_CONFIG.IN_DOCKER} IS_TTY={SHELL_CONFIG.IS_TTY}\n")
if check_db:
# make sure the data dir is owned by a non-root user
if CONSTANTS.DATA_DIR.stat().st_uid == 0:
STDERR.print('[red][X] Error: ArchiveBox DATA_DIR cannot be owned by root![/red]')
STDERR.print(f' {CONSTANTS.DATA_DIR}')
STDERR.print()
STDERR.print('[violet]Hint:[/violet] Are you running archivebox in the right folder? (and as a non-root user?)')
STDERR.print(' cd path/to/your/archive/data')
STDERR.print(' archivebox [command]')
STDERR.print()
raise SystemExit(9)
# Create cache table in DB if needed
try:
from django.core.cache import cache
cache.get('test', None)
except django.db.utils.OperationalError:
call_command("createcachetable", verbosity=0)
bump_startup_progress_bar()
# if archivebox gets imported multiple times, we have to close
# the sqlite3 whenever we init from scratch to avoid multiple threads
# sharing the same connection by accident
from django.db import connections
for conn in connections.all():
conn.close_if_unusable_or_obsolete()
sql_index_path = CONSTANTS.DATABASE_FILE
assert os.access(sql_index_path, os.F_OK), (
f'No database file {sql_index_path} found in: {CONSTANTS.DATA_DIR} (Are you in an ArchiveBox collection directory?)')
bump_startup_progress_bar()
# https://docs.pydantic.dev/logfire/integrations/django/ Logfire Debugging
# if settings.DEBUG_LOGFIRE:
# from opentelemetry.instrumentation.sqlite3 import SQLite3Instrumentor
# SQLite3Instrumentor().instrument()
# import logfire
# logfire.configure()
# logfire.instrument_django(is_sql_commentor_enabled=True)
# logfire.info(f'Started ArchiveBox v{CONSTANTS.VERSION}', argv=sys.argv)
except KeyboardInterrupt:
raise SystemExit(2)
DJANGO_SET_UP = True
INITIAL_STARTUP_PROGRESS = None
INITIAL_STARTUP_PROGRESS_TASK = None