ArchiveBox/archivebox/pkgs/abx-spec-config/abx_spec_config/base_configset.py

340 lines
14 KiB
Python

__package__ = 'abx_spec_config'
import os
import sys
import re
from pathlib import Path
from typing import Type, Tuple, Callable, ClassVar, Dict, Any
from typing_extensions import Annotated
import toml
from rich import print
from benedict import benedict
from pydantic import model_validator, TypeAdapter, AliasChoices, AfterValidator
from pydantic_settings import BaseSettings, SettingsConfigDict, PydanticBaseSettingsSource
from pydantic_settings.sources import TomlConfigSettingsSource
import abx
from . import toml_util
AUTOFIXES_HEADER = "[AUTOFIXES]"
AUTOFIXES_SUBHEADER = "# The following config was added automatically to fix problems detected at startup:"
_ALREADY_WARNED_ABOUT_UPDATED_CONFIG = set()
ConfigKeyStr = Annotated[str, AfterValidator(lambda x: x.isidentifier() and x.isupper() and not x.startswith('_'))]
class FlatTomlConfigSettingsSource(TomlConfigSettingsSource):
"""
A source class that loads variables from a TOML file
"""
def __init__(
self,
settings_cls: type[BaseSettings],
toml_file: Path | None=None,
):
self.toml_file_path = toml_file or settings_cls.model_config.get("toml_file")
self.nested_toml_data = self._read_files(self.toml_file_path)
self.toml_data = {}
for top_level_key, top_level_value in self.nested_toml_data.items():
if isinstance(top_level_value, dict):
# value is nested, flatten it
for key, value in top_level_value.items():
self.toml_data[key] = value
else:
# value is already flat, just set it as-is
self.toml_data[top_level_key] = top_level_value
# filter toml_data to only include keys that are defined on this settings_cls
self.toml_data = {
key: value
for key, value in self.toml_data.items()
if key in settings_cls.model_fields
}
super(TomlConfigSettingsSource, self).__init__(settings_cls, self.toml_data)
class BaseConfigSet(BaseSettings):
"""
This is the base class for an ArchiveBox ConfigSet.
It handles loading values from schema defaults, ArchiveBox.conf TOML config, and environment variables.
class WgetConfig(ArchiveBoxBaseConfig):
WGET_BINARY: str = Field(default='wget', alias='WGET_BINARY_PATH')
c = WgetConfig()
print(c.WGET_BINARY) # outputs: wget
# you can mutate process environment variable and reload config using .__init__()
os.environ['WGET_BINARY_PATH'] = 'wget2'
c.__init__()
print(c.WGET_BINARY) # outputs: wget2
"""
# these pydantic config options are all VERY carefully chosen, make sure to test thoroughly before changing!!!
model_config = SettingsConfigDict(
validate_default=False,
case_sensitive=True,
extra="ignore",
arbitrary_types_allowed=False,
populate_by_name=True,
from_attributes=True,
loc_by_alias=False,
validate_assignment=True,
validate_return=True,
revalidate_instances="subclass-instances",
)
load_from_defaults: ClassVar[bool] = True # read from schema defaults
load_from_system: ClassVar[bool] = True # read from ~/.config/abx/abx.conf
load_from_collection: ClassVar[bool] = True # read from ./ArchiveBox.conf
load_from_environment: ClassVar[bool] = True # read from environment variables
@classmethod
def settings_customise_sources(
cls,
settings_cls: Type[BaseSettings],
init_settings: PydanticBaseSettingsSource,
env_settings: PydanticBaseSettingsSource,
dotenv_settings: PydanticBaseSettingsSource,
file_secret_settings: PydanticBaseSettingsSource,
) -> Tuple[PydanticBaseSettingsSource, ...]:
"""Defines the config precedence order: Schema defaults -> ArchiveBox.conf (TOML) -> Environment variables"""
# import ipdb; ipdb.set_trace()
default_configs = [init_settings] if cls.load_from_defaults else []
system_configs = []
collection_configs = []
environment_configs = [env_settings] if cls.load_from_environment else []
# load system config from ~/.config/abx/abx.conf
SYSTEM_CONFIG_FILE = abx.pm.hook.get_system_config_path()
if cls.load_from_system and os.path.isfile(SYSTEM_CONFIG_FILE):
try:
system_configs = [FlatTomlConfigSettingsSource(settings_cls, toml_file=SYSTEM_CONFIG_FILE)]
except Exception as err:
if err.__class__.__name__ == "TOMLDecodeError":
convert_ini_to_toml(SYSTEM_CONFIG_FILE)
system_configs = [FlatTomlConfigSettingsSource(settings_cls, toml_file=SYSTEM_CONFIG_FILE)]
else:
raise
COLLECTION_CONFIG_FILE = abx.pm.hook.get_collection_config_path()
if cls.load_from_collection and os.path.isfile(COLLECTION_CONFIG_FILE):
try:
collection_configs = [FlatTomlConfigSettingsSource(settings_cls, toml_file=COLLECTION_CONFIG_FILE)]
except Exception as err:
if err.__class__.__name__ == "TOMLDecodeError":
convert_ini_to_toml(COLLECTION_CONFIG_FILE)
collection_configs = [FlatTomlConfigSettingsSource(settings_cls, toml_file=COLLECTION_CONFIG_FILE)]
else:
raise
precedence_order = [
*default_configs,
*system_configs,
*collection_configs,
*environment_configs,
]
return tuple(precedence_order)
@model_validator(mode="after")
def fill_defaults(self):
"""Populate any unset values using function provided as their default"""
for key in self.model_fields.keys():
if isinstance(getattr(self, key), Callable):
if self.load_from_defaults:
computed_default = self.get_default_value(key)
# set generated default value as final validated value
setattr(self, key, computed_default)
return self
def validate(self):
"""Manual validation method, to be called from plugin/__init__.py:get_CONFIG()"""
pass
def get_default_value(self, key: ConfigKeyStr):
"""Get the default value for a given config key"""
field = self.model_fields[key]
value = getattr(self, key)
if isinstance(value, Callable):
# if value is a function, execute it to get the actual value, passing existing config as a dict arg if expected
if func_takes_args_or_kwargs(value):
# assemble dict of existing field values to pass to default factory functions
config_so_far = benedict(self.model_dump(include=set(self.model_fields.keys()), warnings=False))
computed_default = field.default(config_so_far)
else:
# otherwise it's a pure function with no args, just call it
computed_default = field.default()
# coerce/check to make sure default factory return value matches type annotation
TypeAdapter(field.annotation).validate_python(computed_default)
return computed_default
return value
def update_in_place(self, warn=False, persist=False, hint='', **kwargs):
"""
Update the config with new values. Use this sparingly! We should almost never be updating config at runtime.
Sets them in the environment so they propagate to spawned subprocesses / across future re-__init__()s and reload from environment
Example acceptable use case: user config says SEARCH_BACKEND_ENGINE=sonic but sonic_client pip library is not installed so we cannot use it.
SEARCH_BACKEND_CONFIG.update_in_place(SEARCH_BACKEND_ENGINE='ripgrep') can be used to reset it back to ripgrep so we can continue.
"""
COLLECTION_CONFIG_FILE = abx.pm.hook.get_collection_config_path()
# SYSTEM_CONFIG_FILE = abx.pm.hook.get_system_config_path()
# silence warnings if they've already been shown once
if all(key in _ALREADY_WARNED_ABOUT_UPDATED_CONFIG for key in kwargs.keys()):
warn = False
if warn or os.environ.get('DEBUG', '').lower() in ('true', '1', 'yes', 'on'):
fix_scope = 'in ArchiveBox.conf' if persist else 'just for current run'
print(f'\n[yellow]:warning: WARNING: Some config cannot be used as-is, fixing automatically {fix_scope}:[/yellow] {hint}', file=sys.stderr)
# set the new values in the environment
for key, value in kwargs.items():
os.environ[key] = str(value)
original_value = getattr(self, key)
if warn:
print(f' {key}={original_value} -> {value}')
_ALREADY_WARNED_ABOUT_UPDATED_CONFIG.add(key)
# if persist=True, write config changes to data/ArchiveBox.conf [AUTOFIXES] section
try:
if persist and COLLECTION_CONFIG_FILE.is_file():
autofixes_to_add = benedict(kwargs).to_toml(encoder=toml_util.CustomTOMLEncoder())
existing_config = COLLECTION_CONFIG_FILE.read_text().split(AUTOFIXES_HEADER, 1)[0].strip()
if AUTOFIXES_HEADER in existing_config:
existing_autofixes = existing_config.split(AUTOFIXES_HEADER, 1)[-1].strip().replace(AUTOFIXES_SUBHEADER, '').replace(AUTOFIXES_HEADER, '').strip()
else:
existing_autofixes = ''
new_config = '\n'.join(line for line in [
existing_config,
'\n' + AUTOFIXES_HEADER,
AUTOFIXES_SUBHEADER,
existing_autofixes,
autofixes_to_add,
] if line.strip()).strip() + '\n'
COLLECTION_CONFIG_FILE.write_text(new_config)
except Exception:
pass
self.__init__()
if warn:
print(file=sys.stderr)
return self
@property
def aliases(self) -> Dict[ConfigKeyStr, ConfigKeyStr]:
alias_map = {}
for key, field in self.model_fields.items():
alias_map[key] = key
if field.validation_alias is None:
continue
if isinstance(field.validation_alias, AliasChoices):
for alias in field.validation_alias.choices:
alias_map[alias] = key
elif isinstance(field.alias, str):
alias_map[field.alias] = key
else:
raise ValueError(f'Unknown alias type for field {key}: {field.alias}')
return benedict(alias_map)
@property
def toml_section_header(self):
"""Convert the class name to a TOML section header e.g. ShellConfig -> SHELL_CONFIG"""
class_name = self.__class__.__name__
return re.sub('([A-Z]+)', r'_\1', class_name).upper().strip('_')
def from_defaults(self) -> Dict[ConfigKeyStr, Any]:
"""Get the dictionary of {key: value} config loaded from the default values"""
class OnlyDefaultsConfig(self.__class__):
load_from_defaults = True
load_from_collection = False
load_from_environment = False
return benedict(OnlyDefaultsConfig().model_dump(exclude_unset=False, exclude_defaults=False, exclude=set(self.model_computed_fields.keys())))
def from_collection(self) -> Dict[ConfigKeyStr, Any]:
"""Get the dictionary of {key: value} config loaded from the collection ArchiveBox.conf"""
class OnlyConfigFileConfig(self.__class__):
load_from_defaults = False
load_from_collection = True
load_from_environment = False
return benedict(OnlyConfigFileConfig().model_dump(exclude_unset=True, exclude_defaults=False, exclude=set(self.model_computed_fields.keys())))
def from_environment(self) -> Dict[ConfigKeyStr, Any]:
"""Get the dictionary of {key: value} config loaded from the environment variables"""
class OnlyEnvironmentConfig(self.__class__):
load_from_defaults = False
load_from_collection = False
load_from_environment = True
return benedict(OnlyEnvironmentConfig().model_dump(exclude_unset=True, exclude_defaults=False, exclude=set(self.model_computed_fields.keys())))
def from_computed(self) -> Dict[ConfigKeyStr, Any]:
"""Get the dictionary of {key: value} config loaded from the computed fields"""
return benedict(self.model_dump(include=set(self.model_computed_fields.keys())))
def to_toml_dict(self, defaults=False) -> Dict[ConfigKeyStr, Any]:
"""Get the current config as a TOML-ready dict"""
config_dict = {}
for key, value in benedict(self).items():
if defaults or value != self.get_default_value(key):
config_dict[key] = value
return benedict({self.toml_section_header: config_dict})
def to_toml_str(self, defaults=False) -> str:
"""Get the current config as a TOML string"""
from archivebox.misc.toml_util import CustomTOMLEncoder
toml_dict = self.to_toml_dict(defaults=defaults)
if not toml_dict[self.toml_section_header]:
# if the section is empty, don't write it
toml_dict.pop(self.toml_section_header)
return toml.dumps(toml_dict, encoder=CustomTOMLEncoder())
def func_takes_args_or_kwargs(lambda_func: Callable[..., Any]) -> bool:
"""returns True if a lambda func takes args/kwargs of any kind, otherwise false if it's pure/argless"""
code = lambda_func.__code__
has_args = code.co_argcount > 0
has_varargs = code.co_flags & 0x04 != 0
has_varkw = code.co_flags & 0x08 != 0
return has_args or has_varargs or has_varkw
def convert_ini_to_toml(ini_file: Path):
"""Convert an INI file to a TOML file, saving the original to .ORIGINALNAME.bak"""
bak_path = ini_file.parent / f'.{ini_file.name}.bak'
original_ini = ini_file.read_text()
bak_path.write_text(original_ini)
new_toml = toml_util.convert(original_ini)
ini_file.write_text(new_toml)