340 lines
14 KiB
Python
340 lines
14 KiB
Python
__package__ = 'abx_spec_config'
|
|
|
|
import os
|
|
import sys
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Type, Tuple, Callable, ClassVar, Dict, Any
|
|
from typing_extensions import Annotated
|
|
|
|
import toml
|
|
from rich import print
|
|
|
|
from benedict import benedict
|
|
from pydantic import model_validator, TypeAdapter, AliasChoices, AfterValidator
|
|
from pydantic_settings import BaseSettings, SettingsConfigDict, PydanticBaseSettingsSource
|
|
from pydantic_settings.sources import TomlConfigSettingsSource
|
|
|
|
import abx
|
|
|
|
from . import toml_util
|
|
|
|
|
|
AUTOFIXES_HEADER = "[AUTOFIXES]"
|
|
AUTOFIXES_SUBHEADER = "# The following config was added automatically to fix problems detected at startup:"
|
|
|
|
_ALREADY_WARNED_ABOUT_UPDATED_CONFIG = set()
|
|
|
|
ConfigKeyStr = Annotated[str, AfterValidator(lambda x: x.isidentifier() and x.isupper() and not x.startswith('_'))]
|
|
|
|
|
|
class FlatTomlConfigSettingsSource(TomlConfigSettingsSource):
|
|
"""
|
|
A source class that loads variables from a TOML file
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
settings_cls: type[BaseSettings],
|
|
toml_file: Path | None=None,
|
|
):
|
|
self.toml_file_path = toml_file or settings_cls.model_config.get("toml_file")
|
|
|
|
self.nested_toml_data = self._read_files(self.toml_file_path)
|
|
self.toml_data = {}
|
|
for top_level_key, top_level_value in self.nested_toml_data.items():
|
|
if isinstance(top_level_value, dict):
|
|
# value is nested, flatten it
|
|
for key, value in top_level_value.items():
|
|
self.toml_data[key] = value
|
|
else:
|
|
# value is already flat, just set it as-is
|
|
self.toml_data[top_level_key] = top_level_value
|
|
|
|
# filter toml_data to only include keys that are defined on this settings_cls
|
|
self.toml_data = {
|
|
key: value
|
|
for key, value in self.toml_data.items()
|
|
if key in settings_cls.model_fields
|
|
}
|
|
|
|
super(TomlConfigSettingsSource, self).__init__(settings_cls, self.toml_data)
|
|
|
|
|
|
class BaseConfigSet(BaseSettings):
|
|
"""
|
|
This is the base class for an ArchiveBox ConfigSet.
|
|
It handles loading values from schema defaults, ArchiveBox.conf TOML config, and environment variables.
|
|
|
|
class WgetConfig(ArchiveBoxBaseConfig):
|
|
WGET_BINARY: str = Field(default='wget', alias='WGET_BINARY_PATH')
|
|
|
|
c = WgetConfig()
|
|
print(c.WGET_BINARY) # outputs: wget
|
|
|
|
# you can mutate process environment variable and reload config using .__init__()
|
|
os.environ['WGET_BINARY_PATH'] = 'wget2'
|
|
c.__init__()
|
|
|
|
print(c.WGET_BINARY) # outputs: wget2
|
|
|
|
"""
|
|
|
|
# these pydantic config options are all VERY carefully chosen, make sure to test thoroughly before changing!!!
|
|
model_config = SettingsConfigDict(
|
|
validate_default=False,
|
|
case_sensitive=True,
|
|
extra="ignore",
|
|
arbitrary_types_allowed=False,
|
|
populate_by_name=True,
|
|
from_attributes=True,
|
|
loc_by_alias=False,
|
|
validate_assignment=True,
|
|
validate_return=True,
|
|
revalidate_instances="subclass-instances",
|
|
)
|
|
|
|
load_from_defaults: ClassVar[bool] = True # read from schema defaults
|
|
load_from_system: ClassVar[bool] = True # read from ~/.config/abx/abx.conf
|
|
load_from_collection: ClassVar[bool] = True # read from ./ArchiveBox.conf
|
|
load_from_environment: ClassVar[bool] = True # read from environment variables
|
|
|
|
@classmethod
|
|
def settings_customise_sources(
|
|
cls,
|
|
settings_cls: Type[BaseSettings],
|
|
init_settings: PydanticBaseSettingsSource,
|
|
env_settings: PydanticBaseSettingsSource,
|
|
dotenv_settings: PydanticBaseSettingsSource,
|
|
file_secret_settings: PydanticBaseSettingsSource,
|
|
) -> Tuple[PydanticBaseSettingsSource, ...]:
|
|
"""Defines the config precedence order: Schema defaults -> ArchiveBox.conf (TOML) -> Environment variables"""
|
|
|
|
# import ipdb; ipdb.set_trace()
|
|
|
|
default_configs = [init_settings] if cls.load_from_defaults else []
|
|
system_configs = []
|
|
collection_configs = []
|
|
environment_configs = [env_settings] if cls.load_from_environment else []
|
|
|
|
# load system config from ~/.config/abx/abx.conf
|
|
SYSTEM_CONFIG_FILE = abx.pm.hook.get_system_config_path()
|
|
if cls.load_from_system and os.path.isfile(SYSTEM_CONFIG_FILE):
|
|
try:
|
|
system_configs = [FlatTomlConfigSettingsSource(settings_cls, toml_file=SYSTEM_CONFIG_FILE)]
|
|
except Exception as err:
|
|
if err.__class__.__name__ == "TOMLDecodeError":
|
|
convert_ini_to_toml(SYSTEM_CONFIG_FILE)
|
|
system_configs = [FlatTomlConfigSettingsSource(settings_cls, toml_file=SYSTEM_CONFIG_FILE)]
|
|
else:
|
|
raise
|
|
|
|
COLLECTION_CONFIG_FILE = abx.pm.hook.get_collection_config_path()
|
|
if cls.load_from_collection and os.path.isfile(COLLECTION_CONFIG_FILE):
|
|
try:
|
|
collection_configs = [FlatTomlConfigSettingsSource(settings_cls, toml_file=COLLECTION_CONFIG_FILE)]
|
|
except Exception as err:
|
|
if err.__class__.__name__ == "TOMLDecodeError":
|
|
convert_ini_to_toml(COLLECTION_CONFIG_FILE)
|
|
collection_configs = [FlatTomlConfigSettingsSource(settings_cls, toml_file=COLLECTION_CONFIG_FILE)]
|
|
else:
|
|
raise
|
|
|
|
precedence_order = [
|
|
*default_configs,
|
|
*system_configs,
|
|
*collection_configs,
|
|
*environment_configs,
|
|
]
|
|
return tuple(precedence_order)
|
|
|
|
@model_validator(mode="after")
|
|
def fill_defaults(self):
|
|
"""Populate any unset values using function provided as their default"""
|
|
|
|
for key in self.model_fields.keys():
|
|
if isinstance(getattr(self, key), Callable):
|
|
if self.load_from_defaults:
|
|
computed_default = self.get_default_value(key)
|
|
# set generated default value as final validated value
|
|
setattr(self, key, computed_default)
|
|
return self
|
|
|
|
def validate(self):
|
|
"""Manual validation method, to be called from plugin/__init__.py:get_CONFIG()"""
|
|
pass
|
|
|
|
def get_default_value(self, key: ConfigKeyStr):
|
|
"""Get the default value for a given config key"""
|
|
field = self.model_fields[key]
|
|
value = getattr(self, key)
|
|
|
|
if isinstance(value, Callable):
|
|
# if value is a function, execute it to get the actual value, passing existing config as a dict arg if expected
|
|
if func_takes_args_or_kwargs(value):
|
|
# assemble dict of existing field values to pass to default factory functions
|
|
config_so_far = benedict(self.model_dump(include=set(self.model_fields.keys()), warnings=False))
|
|
computed_default = field.default(config_so_far)
|
|
else:
|
|
# otherwise it's a pure function with no args, just call it
|
|
computed_default = field.default()
|
|
|
|
# coerce/check to make sure default factory return value matches type annotation
|
|
TypeAdapter(field.annotation).validate_python(computed_default)
|
|
|
|
return computed_default
|
|
return value
|
|
|
|
def update_in_place(self, warn=False, persist=False, hint='', **kwargs):
|
|
"""
|
|
Update the config with new values. Use this sparingly! We should almost never be updating config at runtime.
|
|
Sets them in the environment so they propagate to spawned subprocesses / across future re-__init__()s and reload from environment
|
|
|
|
Example acceptable use case: user config says SEARCH_BACKEND_ENGINE=sonic but sonic_client pip library is not installed so we cannot use it.
|
|
SEARCH_BACKEND_CONFIG.update_in_place(SEARCH_BACKEND_ENGINE='ripgrep') can be used to reset it back to ripgrep so we can continue.
|
|
"""
|
|
|
|
COLLECTION_CONFIG_FILE = abx.pm.hook.get_collection_config_path()
|
|
# SYSTEM_CONFIG_FILE = abx.pm.hook.get_system_config_path()
|
|
|
|
# silence warnings if they've already been shown once
|
|
if all(key in _ALREADY_WARNED_ABOUT_UPDATED_CONFIG for key in kwargs.keys()):
|
|
warn = False
|
|
|
|
if warn or os.environ.get('DEBUG', '').lower() in ('true', '1', 'yes', 'on'):
|
|
fix_scope = 'in ArchiveBox.conf' if persist else 'just for current run'
|
|
print(f'\n[yellow]:warning: WARNING: Some config cannot be used as-is, fixing automatically {fix_scope}:[/yellow] {hint}', file=sys.stderr)
|
|
|
|
# set the new values in the environment
|
|
for key, value in kwargs.items():
|
|
os.environ[key] = str(value)
|
|
original_value = getattr(self, key)
|
|
if warn:
|
|
print(f' {key}={original_value} -> {value}')
|
|
_ALREADY_WARNED_ABOUT_UPDATED_CONFIG.add(key)
|
|
|
|
# if persist=True, write config changes to data/ArchiveBox.conf [AUTOFIXES] section
|
|
try:
|
|
if persist and COLLECTION_CONFIG_FILE.is_file():
|
|
autofixes_to_add = benedict(kwargs).to_toml(encoder=toml_util.CustomTOMLEncoder())
|
|
|
|
existing_config = COLLECTION_CONFIG_FILE.read_text().split(AUTOFIXES_HEADER, 1)[0].strip()
|
|
if AUTOFIXES_HEADER in existing_config:
|
|
existing_autofixes = existing_config.split(AUTOFIXES_HEADER, 1)[-1].strip().replace(AUTOFIXES_SUBHEADER, '').replace(AUTOFIXES_HEADER, '').strip()
|
|
else:
|
|
existing_autofixes = ''
|
|
|
|
new_config = '\n'.join(line for line in [
|
|
existing_config,
|
|
'\n' + AUTOFIXES_HEADER,
|
|
AUTOFIXES_SUBHEADER,
|
|
existing_autofixes,
|
|
autofixes_to_add,
|
|
] if line.strip()).strip() + '\n'
|
|
COLLECTION_CONFIG_FILE.write_text(new_config)
|
|
except Exception:
|
|
pass
|
|
self.__init__()
|
|
if warn:
|
|
print(file=sys.stderr)
|
|
|
|
return self
|
|
|
|
@property
|
|
def aliases(self) -> Dict[ConfigKeyStr, ConfigKeyStr]:
|
|
alias_map = {}
|
|
for key, field in self.model_fields.items():
|
|
alias_map[key] = key
|
|
|
|
if field.validation_alias is None:
|
|
continue
|
|
|
|
if isinstance(field.validation_alias, AliasChoices):
|
|
for alias in field.validation_alias.choices:
|
|
alias_map[alias] = key
|
|
elif isinstance(field.alias, str):
|
|
alias_map[field.alias] = key
|
|
else:
|
|
raise ValueError(f'Unknown alias type for field {key}: {field.alias}')
|
|
|
|
return benedict(alias_map)
|
|
|
|
|
|
@property
|
|
def toml_section_header(self):
|
|
"""Convert the class name to a TOML section header e.g. ShellConfig -> SHELL_CONFIG"""
|
|
class_name = self.__class__.__name__
|
|
return re.sub('([A-Z]+)', r'_\1', class_name).upper().strip('_')
|
|
|
|
|
|
def from_defaults(self) -> Dict[ConfigKeyStr, Any]:
|
|
"""Get the dictionary of {key: value} config loaded from the default values"""
|
|
class OnlyDefaultsConfig(self.__class__):
|
|
load_from_defaults = True
|
|
load_from_collection = False
|
|
load_from_environment = False
|
|
return benedict(OnlyDefaultsConfig().model_dump(exclude_unset=False, exclude_defaults=False, exclude=set(self.model_computed_fields.keys())))
|
|
|
|
def from_collection(self) -> Dict[ConfigKeyStr, Any]:
|
|
"""Get the dictionary of {key: value} config loaded from the collection ArchiveBox.conf"""
|
|
class OnlyConfigFileConfig(self.__class__):
|
|
load_from_defaults = False
|
|
load_from_collection = True
|
|
load_from_environment = False
|
|
return benedict(OnlyConfigFileConfig().model_dump(exclude_unset=True, exclude_defaults=False, exclude=set(self.model_computed_fields.keys())))
|
|
|
|
def from_environment(self) -> Dict[ConfigKeyStr, Any]:
|
|
"""Get the dictionary of {key: value} config loaded from the environment variables"""
|
|
class OnlyEnvironmentConfig(self.__class__):
|
|
load_from_defaults = False
|
|
load_from_collection = False
|
|
load_from_environment = True
|
|
return benedict(OnlyEnvironmentConfig().model_dump(exclude_unset=True, exclude_defaults=False, exclude=set(self.model_computed_fields.keys())))
|
|
|
|
def from_computed(self) -> Dict[ConfigKeyStr, Any]:
|
|
"""Get the dictionary of {key: value} config loaded from the computed fields"""
|
|
return benedict(self.model_dump(include=set(self.model_computed_fields.keys())))
|
|
|
|
|
|
def to_toml_dict(self, defaults=False) -> Dict[ConfigKeyStr, Any]:
|
|
"""Get the current config as a TOML-ready dict"""
|
|
config_dict = {}
|
|
for key, value in benedict(self).items():
|
|
if defaults or value != self.get_default_value(key):
|
|
config_dict[key] = value
|
|
|
|
return benedict({self.toml_section_header: config_dict})
|
|
|
|
def to_toml_str(self, defaults=False) -> str:
|
|
"""Get the current config as a TOML string"""
|
|
from archivebox.misc.toml_util import CustomTOMLEncoder
|
|
|
|
toml_dict = self.to_toml_dict(defaults=defaults)
|
|
if not toml_dict[self.toml_section_header]:
|
|
# if the section is empty, don't write it
|
|
toml_dict.pop(self.toml_section_header)
|
|
|
|
return toml.dumps(toml_dict, encoder=CustomTOMLEncoder())
|
|
|
|
|
|
|
|
def func_takes_args_or_kwargs(lambda_func: Callable[..., Any]) -> bool:
|
|
"""returns True if a lambda func takes args/kwargs of any kind, otherwise false if it's pure/argless"""
|
|
code = lambda_func.__code__
|
|
has_args = code.co_argcount > 0
|
|
has_varargs = code.co_flags & 0x04 != 0
|
|
has_varkw = code.co_flags & 0x08 != 0
|
|
return has_args or has_varargs or has_varkw
|
|
|
|
|
|
|
|
|
|
def convert_ini_to_toml(ini_file: Path):
|
|
"""Convert an INI file to a TOML file, saving the original to .ORIGINALNAME.bak"""
|
|
|
|
bak_path = ini_file.parent / f'.{ini_file.name}.bak'
|
|
original_ini = ini_file.read_text()
|
|
bak_path.write_text(original_ini)
|
|
new_toml = toml_util.convert(original_ini)
|
|
ini_file.write_text(new_toml)
|