Source code for anime_downloader.sites.anime

"""
anime.py contains the base classes required for other anime classes.
"""
import os
import logging
import copy
import importlib

from anime_downloader.sites.exceptions import AnimeDLError, NotFoundError
from anime_downloader import util
from anime_downloader.config import Config
from anime_downloader.extractors import get_extractor
from anime_downloader.downloader import get_downloader

logger = logging.getLogger(__name__)


[docs]class Anime: """ Base class for all anime classes. Parameters ---------- url: string URL of the anime. quality: One of ['360p', '480p', '540p', '720p', '1080p'] Quality of episodes fallback_qualities: list The order of fallback. Attributes ---------- sitename: str name of the site title: str Title of the anime meta: dict metadata about the anime. [Can be empty] QUALITIES: list Possible qualities for the site """ sitename = '' title = '' meta = dict() subclasses = {} subbed = None QUALITIES = ['360p', '480p', '540p', '720p', '1080p']
[docs] @classmethod def search(cls, query): """ Search searches for the anime using the query given. Parameters ---------- query: str query is the query keyword to be searched. Returns ------- list List of :py:class:`~anime_downloader.sites.anime.SearchResult` """ return
def __init__(self, url=None, quality='720p', fallback_qualities=None, _skip_online_data=False, subbed=None): self.url = url self.subbed = subbed if fallback_qualities is None: fallback_qualities = ['720p', '480p', '360p'] self._fallback_qualities = [ q for q in fallback_qualities if q in self.QUALITIES] if quality in self.QUALITIES: self.quality = quality else: raise AnimeDLError( 'Quality {0} not found in {1}'.format(quality, self.QUALITIES)) if not _skip_online_data: logger.info('Extracting episode info from page') self._episode_urls = self.get_data() self._len = len(self._episode_urls) @classmethod def verify_url(cls, url): if cls.sitename in url: return True return False @property def config(self): return Config['siteconfig'][self.sitename] def __init_subclass__(cls, sitename, **kwargs): super().__init_subclass__(**kwargs) cls.subclasses[sitename] = cls @classmethod def factory(cls, sitename: str): """ factory returns the appropriate subclass for the given site name. Parameters ---------- sitename: str sitename is the name of the site Returns ------- subclass of :py:class:`Anime` Sub class of :py:class:`Anime` """ return cls.subclasses[sitename] @classmethod def new_anime(cls, sitename: str): """ new_anime is a factory which returns the anime class corresposing to `sitename` Returns ------- subclass of Anime """ module = importlib.import_module( 'anime_downloader.sites.{}'.format(sitename) ) for c in dir(module): if issubclass(c, cls): return c raise ImportError("Cannot find subclass of {}".format(cls))
[docs] def get_data(self): """ get_data is called inside the :code:`__init__` of :py:class:`~anime_downloader.sites.anime.BaseAnime`. It is used to get the necessary data about the anime and it's episodes. This function calls :py:class:`~anime_downloader.sites.anime.BaseAnime._scrape_episodes` and :py:class:`~anime_downloader.sites.anime.BaseAnime._scrape_metadata` TODO: Refactor this so that classes which need not be soupified don't have to overload this function. Returns ------- list A list of tuples of episodes containing episode name and episode url. Ex:: [('1', 'https://9anime.is/.../...', ...)] """ self._episode_urls = [] try: self._scrape_metadata() except Exception as e: logger.debug('Metadata scraping error: {}'.format(e)) self._episode_urls = self._scrape_episodes() self._len = len(self._episode_urls) logger.debug('EPISODE IDS: length: {}, ids: {}'.format( self._len, self._episode_urls)) if not isinstance(self._episode_urls[0], tuple): self._episode_urls = [(no + 1, id) for no, id in enumerate(self._episode_urls)] return self._episode_urls
def __getitem__(self, index): episode_class = AnimeEpisode.subclasses[self.sitename] if isinstance(index, int): try: ep_id = self._episode_urls[index] except IndexError as e: raise RuntimeError("No episode found with index") from e return episode_class(ep_id[1], parent=self, ep_no=ep_id[0]) elif isinstance(index, slice): anime = copy.deepcopy(self) try: anime._episode_urls = anime._episode_urls[index] except IndexError as e: raise RuntimeError("No episode found with index") from e return anime return None def __iter__(self): episode_class = AnimeEpisode.subclasses[self.sitename] for ep_id in self._episode_urls: yield episode_class(ep_id[1], parent=self, ep_no=ep_id[0]) def __repr__(self): return ''' Site: {name} Anime: {title} Episode count: {length} '''.format(name=self.sitename, title=self.title, length=len(self)) def __len__(self): return self._len def __str__(self): return self.title def _scarpe_episodes(self): """ _scarpe_episodes is function which has to be overridden by the base classes to scrape the episode urls from the web page. Parameters ---------- soup: `bs4.BeautifulSoup` soup is the html of the anime url after passing through BeautifulSoup. Returns ------- :code:`list` of :code:`str` A list of episode urls. """ return
[docs] def _scrape_metadata(self): """ _scrape_metadata is function which has to be overridden by the base classes to scrape the metadata of anime from the web page. Parameters ---------- soup: :py:class:`bs4.BeautifulSoup` soup is the html of the anime url after passing through BeautifulSoup. """ return
[docs]class AnimeEpisode: """ Base class for all Episode classes. Parameters ---------- url: string URL of the episode. quality: One of ['360p', '480p', '540p', '720p', '1080p'] Quality of episode fallback_qualities: list The order of fallback. Attributes ---------- sitename: str name of the site title: str Title of the anime meta: dict metadata about the anime. [Can be empty] ep_no: string Episode number/title of the episode pretty_title: string Pretty title of episode in format <animename>-<ep_no> headers: dict Headers the downloader should use, used to bypass downloading restrictions. """ QUALITIES = [] title = '' stream_url = '' subclasses = {} def __init__(self, url, parent: Anime = None, ep_no=None): self.ep_no = ep_no self.url = url self.quality = parent.quality self.QUALITIES = parent.QUALITIES self._parent = parent self._sources = None self.headers = {} self.pretty_title = '{}-{}'.format(self._parent.title, self.ep_no) logger.debug("Extracting stream info of id: {}".format(self.url)) def try_data(): self.get_data() # Just to verify the source is acquired self.source().stream_url try: try_data() except NotFoundError: # Issue #28 qualities = copy.copy(self._parent._fallback_qualities) try: qualities.remove(self.quality) except ValueError: pass for quality in qualities: logger.warning('Quality {} not found. Trying {}.'.format( self.quality, quality)) self.quality = quality try: try_data() return except NotFoundError: pass logger.warning(f'Skipping episode: {self.ep_no}') def __init_subclass__(cls, sitename: str, **kwargs): super().__init_subclass__(**kwargs) cls.subclasses[sitename] = cls cls.sitename = sitename @classmethod def factory(cls, sitename: str): return cls.subclasses[sitename] @property def config(self): return Config['siteconfig'][self.sitename]
[docs] def source(self, index=0): """ Get the source for episode Returns ------- `anime_downloader.extractors.base_extractor.BaseExtractor` Extractor depending on the source. """ if not self._sources: self.get_data() try: sitename, url = self._sources[index] except TypeError: return self._sources[index] except IndexError: raise NotFoundError("No episode sources found.") ext = get_extractor(sitename)( url, quality=self.quality, headers=self.headers) self._sources[index] = ext return ext
def get_data(self): self._sources = self._get_sources() logger.debug('Sources : {}'.format(self._sources)) def _get_sources(self): raise NotImplementedError def sort_sources(self, data): """ Formatted data should look something like this [ {'extractor': 'mp4upload', 'url': 'https://twist.moe/mp4upload/...', 'server': 'mp4upload', 'version': 'subbed'}, {'extractor': 'vidstream', 'url': 'https://twist.moe/vidstream/...', 'server': 'vidstream', 'version': 'dubbed'}, {'extractor': 'no_extractor', 'url': 'https://twist.moe/anime/...', 'server': 'default', 'version': 'subbed'} ] extractor = the extractor the link should be passed to url = url to be passed to the extractor server = the server name used in config version = subbed/dubbed The config should consist of a list with servers in preferred order and a preferred language, eg "servers":["vidstream","default","mp4upload"], "version":"subbed" Using the example above, this function will return: [('no_extractor', 'https://twist.moe/anime/...')] as it prioritizes preferred language over preferred server """ if self._parent and self._parent.subbed is not None: version = "subbed" if self._parent.subbed else "dubbed" else: version = self.config.get('version', 'subbed') servers = self.config.get('servers', ['']) logger.debug('Data : {}'.format(data)) # Sorts the dicts by preferred server in config sorted_by_server = sorted(data, key=lambda x: servers.index( x['server']) if x['server'] in servers else len(data)) # Sorts the above by preferred language # resulting in a list with the dicts sorted by language and server # with language being prioritized over server sorted_by_lang = list( sorted(sorted_by_server, key=lambda x: x['version'] == version, reverse=True)) logger.debug('Sorted sources : {}'.format(sorted_by_lang)) return '' if not sorted_by_lang else [(sorted_by_lang[0]['extractor'], sorted_by_lang[0]['url'])]
[docs] def download(self, force=False, path=None, format='{anime_title}_{ep_no}', range_size=None): """ Downloads episode. This might be removed in a future release. Parameters ---------- force: bool Whether to force download or not. path: string Path to the directory/file where the file should be downloaded to. format: string The format of the filename if not provided. """ # TODO: Remove this shit logger.info('Downloading {}'.format(self.pretty_title)) if format: file_name = util.format_filename(format, self) + '.mp4' if path is None: path = './' + file_name if path.endswith('.mp4'): path = path else: path = os.path.join(path, file_name) Downloader = get_downloader('pySmartDL') downloader = Downloader(self.source(), path, force, range_size=range_size) downloader.download()
[docs]class SearchResult: """ SearchResult class holds the search result of a search done by an Anime class Parameters ---------- title: str Title of the anime. url: str URL of the anime poster: str URL for the poster of the anime. meta: dict Additional metadata regarding the anime. Attributes ---------- title: str Title of the anime. url: str URL of the anime poster: str URL for the poster of the anime. meta: dict Additional metadata regarding the anime. meta_info: dict Metadata regarding the anime. Not shown in the results, used to match with MAL """ def __init__(self, title, url, poster='', meta='', meta_info={}): self.title = title self.url = url self.poster = poster self.meta = meta self.meta_info = meta_info def __repr__(self): return '<SearchResult Title: {} URL: {}>'.format(self.title, self.url) def __str__(self): return self.title @property def pretty_metadata(self): """ pretty_metadata is the prettified version of metadata """ if self.meta: return ' | '.join(val for _, val in self.meta.items()) return ''