manolomartinez-greg/greg/classes.py

# Copyright (C) 2012 -- 2016  Manolo Martínez <manolo@austrohungaro.com>
#
# This file is part or Greg.
#
# Greg is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Greg is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Greg.  If not, see <http://www.gnu.org/licenses/>.
"""
This module defines the following classes:

* Session: takes into account flags passed by the command line instruction,
reads config files and data directory

* Feed: Sanitizes and organizes a particular feed and makes it available for
the subcommands

* Placeholders: Calculates and stores the values of placeholders
"""
import configparser
import os.path
import sys
import time
import json
from pkg_resources import resource_filename
from urllib.parse import urlparse
from urllib.error import URLError
from warnings import warn

import greg.aux_functions as aux

config_filename_global = resource_filename(__name__, 'data/greg.conf')


class Session():
    def __init__(self, args):
        self.args = args
        self.config_filename_user = self.retrieve_config_file()
        self.data_dir = self.retrieve_data_directory()
        self.data_filename = os.path.join(self.data_dir, "data")
        self.feeds = configparser.ConfigParser()
        self.feeds.read(self.data_filename)
        self.config = configparser.ConfigParser()
        self.config.read([config_filename_global, self.config_filename_user])

    def list_feeds(self):
        """
        Output a list of all feed names
        """
        feeds = configparser.ConfigParser()
        feeds.read(self.data_filename)
        return feeds.sections()

    def retrieve_config_file(self):
        """
        Retrieve config file
        """
        try:
            if self.args["configfile"]:
                return self.args["configfile"]
        except KeyError:
            pass
        return os.path.expanduser('~/.config/greg/greg.conf')

    def retrieve_data_directory(self):
        """
        Retrieve the data directory
        Look first into config_filename_global
        then into config_filename_user. The latter takes preeminence.
        """
        args = self.args
        try:
            if args['datadirectory']:
                aux.ensure_dir(args['datadirectory'])
                return args['datadirectory']
        except KeyError:
            pass
        config = configparser.ConfigParser()
        config.read([config_filename_global, self.config_filename_user])
        section = config.default_section
        data_path = config.get(section, 'Data directory',
                               fallback='~/.local/share/greg')
        data_path_expanded = os.path.expanduser(data_path)
        aux.ensure_dir(data_path_expanded)
        return os.path.expanduser(data_path_expanded)


class Feed():
    """
    Calculate information about the current feed
    """
    def __init__(self, session, feed, podcast):
        self.session = session
        self.args = session.args
        self.config = self.session.config
        self.name = feed
        if not podcast:
            self.podcast = aux.parse_podcast(session.feeds[feed]["url"])
        else:
            self.podcast = podcast
        self.sync_by_date = self.has_date()
        self.willtag = self.will_tag()
        if self.willtag:
            self.defaulttagdict = self.default_tag_dict()
        self.mime = self.retrieve_mime()
        self.wentwrong = False
        if self.podcast.bozo: # the bozo bit is on, see feedparser docs
            warning = str(self.podcast["bozo_exception"])
            if "URLError" in warning:
                self.wentwrong = warning
            else:
                warn("""This feed is malformed (possibly in unimportant ways):
                        {}""".format(warning), stacklevel=10)
        self.info = os.path.join(session.data_dir, feed)
        self.entrylinks, self.linkdates = aux.parse_feed_info(self.info)

    def retrieve_config(self, value, default):
        """
        Retrieves a value (with a certain fallback) from the config files
        (looks first into config_filename_global then into
        config_filename_user. The latest takes preeminence) if the command line
        flag for the value is used, that overrides everything else
        """
        args = self.args
        name = self.name
        try:
            if args[value]:
                return args[value]
        except KeyError:
            pass
        section = name if self.config.has_section(
            name) else self.config.default_section
        answer = self.config.get(section, value, fallback=default)
        return answer

    def default_tag_dict(self):
        defaultoptions = self.config.defaults()
        tags = [[option.replace(
            "tag_", ""), defaultoptions[option]] for option
            in defaultoptions if "tag_" in option]
        # these are the tags to be filled
        return dict(tags)

    def retrieve_download_path(self):
        """
        Retrieves the download path (looks first into config_filename_global
        then into the [DEFAULT], then the [feed], section of
        config_filename_user. The latest takes preeminence)
        """
        section = self.name if self.config.has_section(
            self.name) else self.config.default_section
        download_path = self.config.get(
            section, 'Download directory', fallback='~/Podcasts')
        subdirectory = self.config.get(
            section, 'Create subdirectories', fallback='no')
        return [os.path.expanduser(download_path), subdirectory]

    def has_date(self):
        podcast = self.podcast
        session = self.session
        name = self.name
        try:  # If the feed has a date, and we can parse it, we use it.
            test = podcast.feed.published_parsed
            sync_by_date = True
        except AttributeError:
            try:
                test = podcast.feed.updated_parsed
                sync_by_date = True
            except AttributeError:
                try:
                    test = podcast.entries[0].published_parsed
                    sync_by_date = True
                except (AttributeError, IndexError):
                    # Otherwise, we use download links.
                    print(("I cannot parse the time information of this feed."
                           "I'll use your current local time instead."),
                          file=sys.stderr, flush=True)
                    sync_by_date = False
        if not sync_by_date:
            session.feeds[name]["date_info"] = "not available"
            with open(session.data_filename, 'w') as configfile:
                session.feeds.write(configfile)
        else:
            try:
                if session.feeds[name]["date_info"] == "not available":
                    print(("Either this feed has changed, or greg has "
                           "improved, but we can now parse its time "
                           "information. This is good, but it also means that "
                           "(just this time) it's possible that you have "
                           "missed some entries. You might do a 'greg check "
                           "-f {}' to make sure that you're not missing out "
                           "on anything.").format(name))
            except KeyError:
                pass
            session.feeds[name]["date_info"] = "available"
            with open(session.data_filename, 'w') as configfile:
                session.feeds.write(configfile)
        return sync_by_date

    def will_tag(self):
        """
        Check whether the feed should be tagged
        """
        wanttags = self.retrieve_config('Tag', 'no')
        if wanttags == 'yes':
            if aux.eyed3exists:
                willtag = True
            else:
                willtag = False
                print(("You want me to tag {0}, but you have not installed "
                       "the EyeD3 module. I cannot honour your request.").
                      format(self.name), file=sys.stderr, flush=True)
        else:
            willtag = False
        return willtag

    def how_many(self):
        """
        Ascertain where to start downloading, and how many entries.
        """
        if self.linkdates != []:
            # What follows is a quick sanity check: if the entry date is in the
            # future, this is probably a mistake, and we just count the entry
            # date as right now.
            if max(self.linkdates) <= list(time.localtime()):
                currentdate = max(self.linkdates)
            else:
                currentdate = list(time.localtime())
                print(("This entry has its date set in the future. "
                       "I will use your current local time as its date "
                       "instead."),
                      file=sys.stderr, flush=True)
            stop = sys.maxsize
        else:
            currentdate = [1, 1, 1, 0, 0]
            firstsync = self.retrieve_config('firstsync', '1')
            if firstsync == 'all':
                stop = sys.maxsize
            else:
                stop = int(firstsync)
        return currentdate, stop

    def fix_linkdate(self, entry):
        """
        Give a date for the entry, depending on feed.sync_by_date
        Save it as feed.linkdate
        """
        if self.sync_by_date:
            try:
                entry.linkdate = list(entry.published_parsed)
                self.linkdate = list(entry.published_parsed)
            except (AttributeError, TypeError):
                try:
                    entry.linkdate = list(entry.updated_parsed)
                    self.linkdate = list(entry.updated_parsed)
                except (AttributeError, TypeError):
                    print(("This entry doesn't seem to have a parseable date. "
                           "I will use your local time instead."),
                          file=sys.stderr, flush=True)
                    entry.linkdate = list(time.localtime())
                    self.linkdate = list(time.localtime())
        else:
            entry.linkdate = list(time.localtime())

    def retrieve_mime(self):
        """
        Check the mime-type to download
        """
        mime = self.retrieve_config('mime', 'audio')
        mimedict = {"number": mime}
        # the input that parse_for_download expects
        return aux.parse_for_download(mimedict)

    def download_entry(self, entry):
        """
        Find entry link and download entry
        """
        downloadlinks = {}
        downloaded = False
        ignoreenclosures = self.retrieve_config('ignoreenclosures', 'no')
        notype = self.retrieve_config('notype', 'no')
        if ignoreenclosures == 'no':
            for enclosure in entry.enclosures:
                if notype == 'yes':
                    downloadlinks[urlparse(enclosure["href"]).path.split(
                        "/")[-1]] = enclosure["href"]
                    # preserve original name
                else:
                    try:
                        # We will download all enclosures of the desired
                        # mime-type
                        if any([mimetype in enclosure["type"] for mimetype in
                                self.mime]):
                            downloadlinks[urlparse(
                                enclosure["href"]).path.split(
                                    "/")[-1]] = enclosure["href"]
                            # preserve original name
                    except KeyError:
                        print("This podcast carries no information about "
                              "enclosure types. Try using the notype "
                              "option in your greg.conf", file=sys.stderr,
                              flush=True)
        else:
            downloadlinks[urlparse(entry.link).query.split(
                "/")[-1]] = entry.link
        for podname in downloadlinks:
            if (podname, entry.linkdate) not in zip(self.entrylinks,
                                                    self.linkdates):
                try:
                    title = entry.title
                except:
                    title = podname
                try:
                    sanitizedsummary = aux.html_to_text(entry.summary)
                    if sanitizedsummary == "":
                        sanitizedsummary = "No summary available"
                except:
                    sanitizedsummary = "No summary available"
                placeholders = Placeholders(
                    self, entry, downloadlinks[podname], podname, title,
                    sanitizedsummary)
                placeholders = aux.check_directory(placeholders)
                condition = aux.filtercond(placeholders)
                if condition:
                    print("Downloading {} -- {}".format(title, podname))
                    aux.download_handler(self, placeholders)
                    if self.willtag:
                        aux.tag(placeholders)
                    downloaded = True
                else:
                    print("Skipping {} -- {}".format(title, podname))
                    downloaded = False
                if self.info:
                    with open(self.info, 'a') as current:
                        # We write to file this often to ensure that
                        # downloaded entries count as downloaded.
                        json.dump({'entrylink': podname, 'linkdate': entry.linkdate}, current)
                        current.write('\n')
        return downloaded


class Placeholders:
    def __init__(self, feed, entry, link, filename, title, summary):
        self.feed = feed
        self.link = link
        self.filename = filename
        # self.fullpath = os.path.join(self.directory, self.filename)
        self.title = title.replace("\"", "'")
        self.filename_title = aux.sanitize(title)
        try:
            self.podcasttitle = feed.podcast.title
        except AttributeError:
            self.podcasttitle = feed.name
        try:
            self.sanitizedsubtitle = aux.html_to_text(
                feed.podcast.feed.subtitle)
            if self.sanitizedsubtitle == "":
                self.sanitizedsubtitle = "No description"
        except AttributeError:
            self.sanitizedsubtitle = "No description"
        self.entrysummary = summary
        self.filename_podcasttitle = aux.sanitize(self.podcasttitle)
        self.name = feed.name
        self.date = tuple(entry.linkdate)
        self.itunes_episode = entry.get('itunes_episode')

    def date_string(self):
        date_format = self.feed.retrieve_config("date_format", "%Y-%m-%d")
        return time.strftime(date_format, self.date)

    def substitute(self, inputstring):
        """
        Take a string with placeholders, and return the strings with substitutions.
        """
        newst = inputstring.format(link=self.link,
                                   filename=self.filename,
                                   directory=self.directory,
                                   fullpath=self.fullpath,
                                   title=self.title,
                                   filename_title=self.filename_title,
                                   date=self.date_string(),
                                   podcasttitle=self.podcasttitle,
                                   filename_podcasttitle=
                                   self.filename_podcasttitle,
                                   name=self.name,
                                   subtitle=self.sanitizedsubtitle,
                                   entrysummary=self.entrysummary,
                                   itunes_episode = self.itunes_episode)
        return newst