Added: top news method.0.2.5

author: n1 <hrdina.pavel@gmail.com> 2019-07-27 13:25:13 +0200
committer: n1 <hrdina.pavel@gmail.com> 2019-07-27 13:25:13 +0200
commit: 2f2c732c35e67a1beb9ed760e95b627ebe8d63b7 (patch)
tree: 8e5ce9d4b8901654e34d3cdf07e58e30f21328c5
parent: 106b58b52dc2bf34e64f87be3c54e53fecbf3f88 (diff)
4 files changed, 222 insertions, 71 deletions
diff --git a/README.rst b/README.rst
index a781642..7646176 100644
--- a/README.rst
+++ b/README.rst
@@ -61,8 +61,8 @@ Symbol (ticker) -> coninmarketcap.com URL slug conversion.
 
 .. code-block:: python
 
-    c = Karpet()
-    c.get_coin_slug("BTC")  # bitcoin
+    k = Karpet()
+    k.get_coin_slug("BTC")  # bitcoin
 
 ``fetch_historical_data()``
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -70,8 +70,8 @@ Retrieves historical data.
 
 .. code-block:: python
 
-    c = Karpet(date(2019, 1, 1), date(2019, 5, 1))
-    df = c.fetch_crypto_historical_data(coin="bitcoin")  # Dataframe with historical data
+    k = Karpet(date(2019, 1, 1), date(2019, 5, 1))
+    df = k.fetch_crypto_historical_data(symbol="btc")  # Dataframe with historical data.
     df.head()
 
 .. image:: https://raw.githubusercontent.com/im-n1/karpet/master/assets/historical_data.png
@@ -82,8 +82,8 @@ Retrieves exchange list.
 
 .. code-block:: python
 
-    c = Karpet()
-    c.fetch_exchanges("nrg")
+    k = Karpet()
+    k.fetch_exchanges("nrg")
     ['DigiFinex', 'KuCoin', 'CryptoBridge', 'Bitbns', 'CoinExchange']
 
 ``fetch_tweets()``
@@ -92,8 +92,8 @@ Retrieves twitter tweets.
 
 .. code-block:: python
 
-    c = Karpet(date(2019, 1, 1), date(2019, 5, 1))
-    df = c.fetch_tweets(kw_list=["bitcoin"], lang="en")  # Dataframe with tweets.
+    k = Karpet(date(2019, 1, 1), date(2019, 5, 1))
+    df = k.fetch_tweets(kw_list=["bitcoin"], lang="en")  # Dataframe with tweets.
     df.head()
 
 .. image:: https://raw.githubusercontent.com/im-n1/karpet/master/assets/tweets.png
@@ -104,8 +104,8 @@ Retrieves Google Trends - in percents for the given date range.
 
 .. code-block:: python
 
-    c = Karpet(date(2019, 1, 1), date(2019, 5, 1))
-    df = c.fetch_google_trends(kw_list=["bitcoin"])  # Dataframe with trends.
+    k = Karpet(date(2019, 1, 1), date(2019, 5, 1))
+    df = k.fetch_google_trends(kw_list=["bitcoin"])  # Dataframe with trends.
     df.head()
 
 .. image:: https://raw.githubusercontent.com/im-n1/karpet/master/assets/google_trends.png
@@ -125,8 +125,8 @@ Retrieves crypto news.
 
 .. code-block:: python
 
-   c = Karpet()
-   news = c.fetch_news("btc")  # Gets 10 news.
+   k = Karpet()
+   news = k.fetch_news("btc")  # Gets 10 news.
    print(news[0])
    {
       'url': 'https://cointelegraph.com/ ....',  # Truncated.
@@ -135,10 +135,47 @@ Retrieves crypto news.
       'date': datetime.datetime(2019, 7, 10, 19, 0, 13),
       'image': 'https://images.cointelegraph.com/....jpg'  # Truncated.
    }
-   news = c.fetch_news("btc", limit=30)  # Gets 30 news.
+   news = k.fetch_news("btc", limit=30)  # Gets 30 news.
+
+``fetch_top_news()``
+~~~~~~~~~~~~~~~~~~~~
+Retrieves top crypto news in 2 categories:
+
+* Editor's choices - articles picked by editors
+* Hot stories - articles with most views
+
+.. code-block:: python
+
+   k = Karpet()
+   editors_choices, top_stories = k.fetch_top_news()
+   print(len(editors_choices))
+   5
+   print(len(top_stories))
+   5
+   print(editors_choices[0])
+   {
+      'url': 'https://cointelegraph.com/...',  # Truncated.
+      'title': 'Bank of China’s New Infographic Shows Why Bitcoin Price Is Going Up',
+      'date': '2019-07-27T10:07:00+01:00',
+      'image': 'https://images.cointelegraph.com/images/740_aHR...', # Truncated.
+      'description': 'The Chinese central bank released on its website an ...'  # Truncated.
+   }
+   print(top_stories[0])
+   {
+      'url': 'https://cointelegraph.com/...',  # Truncated.
+      'title': 'Bitcoin Price Shuns Volatility as Analysts Warn of Potential Drop to $7,000',
+      'date': '2019-07-22T09:21:00+01:00',
+      'image': 'https://images.cointelegraph.com/images/740_aHR0c...'  # Truncated.
+      'description': 'Stability around $10,600 for Bitcoin price is ...'  # Truncated.
+   }
+
 
 Changelog
 ---------
+0.2.5
+~~~~~
+* Added ``fetch_top_news()`` method for top crypto news separated in 2 categories.
+
 0.2.4
 ~~~~~
 * ``fetch_news()`` adds new "description" item and renames "image_url" to "image".
diff --git a/karpet/core.py b/karpet/core.py
index 65d663a..ad44c8b 100644
--- a/karpet/core.py
+++ b/karpet/core.py
@@ -11,9 +11,8 @@ import requests
 import aiohttp
 
 import re
-from datetime import timedelta, datetime
+from datetime import timedelta
 import time
-import sys
 import asyncio
 
 
@@ -311,8 +310,24 @@ class Karpet:
         * description
         * date
         * image
+
+        :param str symbol: Coin symbol the news will be fetched for.
+        :param int limit: Limit for news count.
         """
 
+        def get_news(symbol, limit):
+            """
+            Fetches news from coincodex.com.
+
+            :return: List of news urls.
+            :rtype: list
+            """
+
+            url = f"https://coincodex.com/api/coincodexicos/get_news/{symbol}/{limit}/1/"
+            data = self._get_json(url)
+
+            return [{"url": d["url"]} for d in data]
+
         def get_coin_slug(symbol):
             """
             Determines coin coincodex.com URL slug for the given
@@ -339,78 +354,158 @@ class Karpet:
                 if c["symbol"].upper() == symbol.upper():
                     return c["shortname"]
 
-        async def fetch_features(news):
+        # Fetch features.
+        news = get_news(symbol, limit)
+        asyncio.run(self._fetch_news_features(news))
+
+        return self._drop_bad_news(news)[:limit]
+
+    def fetch_top_news(self):
+        """
+        Fetches top crypto news. Returns Editor's choice and Hot stories.
+
+        * url
+        * title
+        * description
+        * date
+        * image
+
+        :return: Tuple where first are editors choice news and second hot stories.
+        :rtype: tuple
+        """
+
+        def get_top_news():
             """
-            Asynchronously fetches all news features.
+            Fetches editors choice and hot stories from cointelegraph.com front page.
 
-            :param list news: List of news objects.
+            :return: Dict with ``editors_choice`` and ``hot_stories`` items.
+            :rtype: dict
             """
 
-            async def fetch_all(session, news):
+            headers = {
+                "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0",
+            }
+            response = requests.get("https://cointelegraph.com/", headers=headers)
+            dom = BeautifulSoup(response.text, "lxml")
+
+            def parse_section_news(section):
                 """
-                Fetches all news features.
+                Parse section with news. Section contains of titles and
+                links to the news where only links are parsed out.
 
-                :param aiohttp.ClientSession session: Session instance.
-                :param list news: List of news objects.
+                :param object section: BeautifulSoap element object of the section.
+                :return: List of news objects - {"url": "..."}.
+                :rtype: list
                 """
 
-                await asyncio.gather(*[fetch_one(session, n) for n in news])
+                news_items = section.find_all(class_="main-news-tabs__item")
+                news = []
 
-            async def fetch_one(session, news):
-                """
-                Fetches a few features to the given news object. Features
-                are set directly to the news object.
-                Fetched features are:
+                if len(news_items):
 
-                * image
-                * description
+                    for i in news_items:
+                        news.append({"url": i.find("a")["href"]})
 
-                :param aiohttp.ClientSession session: Session instance.
-                :param object news: News object.
-                """
+                return news
+
+            editors_choice, hot_stories = dom.find_all(class_="main-news-tabs__list")
+
+            return parse_section_news(editors_choice), parse_section_news(hot_stories)
+
+        # Fetch features.
+        editors_choice, hot_stories = get_top_news()
+        asyncio.run(self._fetch_news_features(editors_choice))
+        print("here")
+        asyncio.run(self._fetch_news_features(hot_stories))
+        print("here 2")
 
-                async with session.get(news["url"]) as response:
+        return editors_choice, hot_stories
 
-                    html = await response.text()
-                    dom = BeautifulSoup(html, features="lxml")
+    async def _fetch_news_features(self, news):
+        """
+        Asynchronously fetches all news features.
 
-                    # Image.
-                    try:
-                        news["image"] = dom.find("meta", {"property": "og:image"})["content"]
-                    except:
-                        news["image"] = None
+        :param list news: List of news objects.
+        """
 
-                    # Description.
-                    try:
-                        news["description"] = dom.find("meta", {"property": "og:description"})["content"]
-                    except:
-                        news["description"] = None
+        async def fetch_all(session, news):
+            """
+            Fetches all news features.
 
-            async with aiohttp.ClientSession() as session:
-                await fetch_all(session, news)
+            :param aiohttp.ClientSession session: Session instance.
+            :param list news: List of news objects.
+            """
 
-        url = f"https://coincodex.com/api/coincodexicos/get_news/{symbol}/{limit}/1/"
-        data = self.get_json(url)
+            await asyncio.gather(*[fetch_one(session, n) for n in news])
 
-        news = []
+        async def fetch_one(session, news):
+            """
+            Fetches a few features to the given news object. Features
+            are set directly to the news object.
+            Fetched features are:
 
-        for n in data:
-            try:
-                news.append({
-                    "url": n["url"],
-                    "title": n["title"],
-                    "date": datetime.strptime(n["date"], "%Y-%m-%d %H:%M:%S")
-                })
-            except:
-                tb = sys.exc_info()[2]
-                raise Exception("Couldn't parse news. Skipping...").with_traceback(tb)
+            * date
+            * image
+            * description
+
+            :param aiohttp.ClientSession session: Session instance.
+            :param object news: News object.
+            """
+
+            async with session.get(news["url"]) as response:
+
+                html = await response.text()
+                dom = BeautifulSoup(html, features="lxml")
+
+                # Title.
+                try:
+                    news["title"] = dom.find("meta", {"property": "og:title"})["content"]
+                except:
+                    news["title"] = None
+
+                # Date.
+                try:
+                    news["date"] = dom.find("meta", {"property": "article:published_time"})["content"]
+                except:
+                    news["date"] = None
+
+                # Image.
+                try:
+                    news["image"] = dom.find("meta", {"property": "og:image"})["content"]
+                except:
+                    news["image"] = None
+
+                # Description.
+                try:
+                    news["description"] = dom.find("meta", {"property": "og:description"})["content"]
+                except:
+                    news["description"] = None
+
+        async with aiohttp.ClientSession() as session:
+            await fetch_all(session, news)
+
+    def _drop_bad_news(self, news):
+        """
+        Drops news that doesn't suit following requirements.
+
+        * must have published date (date)
+
+        :param list news: List of news.
+        :return: Filtered list of news.
+        :rtype: list
+        """
+
+        filtered_news = []
+
+        for n in news:
+            if not n["date"]:
+                continue
 
-        # Fetch news features.
-        asyncio.run(fetch_features(news))
+            filtered_news.append(n)
 
-        return news
+        return filtered_news
 
-    def get_json(self, url):
+    def _get_json(self, url):
         """
         Downloads data from the given  URL and parses them as JSON.
         Handles exception and raises own ones with sane messages.
diff --git a/karpet/meta.py b/karpet/meta.py
index c698071..fdbef70 100644
--- a/karpet/meta.py
+++ b/karpet/meta.py
@@ -1,2 +1,2 @@
-__version__ = "0.2.4.1"
+__version__ = "0.2.5"
 __description__ = "Library for fetching coin/token metrics data from the internet."
diff --git a/test_karpet.py b/test_karpet.py
index c0f93f8..222337f 100644
--- a/test_karpet.py
+++ b/test_karpet.py
@@ -45,10 +45,10 @@ def test_fetch_tweets():
 
 def test_fetch_news():
 
-    c = Karpet()
-    news = c.fetch_news("eth")
+    k = Karpet()
+    news = k.fetch_news("eth")
 
-    assert len(news) == 10
+    assert len(news) > 0
     assert "url" in news[0]
     assert "title" in news[0]
     assert "date" in news[0]
@@ -56,6 +56,25 @@ def test_fetch_news():
 
 def test_fetch_news_with_limit():
 
-    c = Karpet()
+    k = Karpet()
+    news = k.fetch_news("eth", limit=30)
+
+    assert 0 < len(news) <= 30
+    print(f"Fetched {len(news)} news.")
+
+
+def test_fetch_top_news():
+
+    k = Karpet()
+    editors_choice, hot_stories = k.fetch_top_news()
+
+    assert len(editors_choice) == 5
+    assert len(hot_stories) == 5
+
+    assert "url" in editors_choice[0]
+    assert "title" in editors_choice[0]
+    assert "date" in editors_choice[0]
 
-    assert len(c.fetch_news("eth", limit=30)) == 30
+    assert "url" in hot_stories[0]
+    assert "title" in hot_stories[0]
+    assert "date" in hot_stories[0]
author	n1 <hrdina.pavel@gmail.com>	2019-07-27 13:25:13 +0200
committer	n1 <hrdina.pavel@gmail.com>	2019-07-27 13:25:13 +0200
commit	2f2c732c35e67a1beb9ed760e95b627ebe8d63b7 (patch)
tree	8e5ce9d4b8901654e34d3cdf07e58e30f21328c5
parent	106b58b52dc2bf34e64f87be3c54e53fecbf3f88 (diff)