[go: up one dir, main page]

summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorn1 <hrdina.pavel@gmail.com>2019-07-27 13:25:13 +0200
committern1 <hrdina.pavel@gmail.com>2019-07-27 13:25:13 +0200
commit2f2c732c35e67a1beb9ed760e95b627ebe8d63b7 (patch)
tree8e5ce9d4b8901654e34d3cdf07e58e30f21328c5
parent106b58b52dc2bf34e64f87be3c54e53fecbf3f88 (diff)
Added: top news method.0.2.5
-rw-r--r--README.rst63
-rw-r--r--karpet/core.py199
-rw-r--r--karpet/meta.py2
-rw-r--r--test_karpet.py29
4 files changed, 222 insertions, 71 deletions
diff --git a/README.rst b/README.rst
index a781642..7646176 100644
--- a/README.rst
+++ b/README.rst
@@ -61,8 +61,8 @@ Symbol (ticker) -> coninmarketcap.com URL slug conversion.
.. code-block:: python
- c = Karpet()
- c.get_coin_slug("BTC") # bitcoin
+ k = Karpet()
+ k.get_coin_slug("BTC") # bitcoin
``fetch_historical_data()``
~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -70,8 +70,8 @@ Retrieves historical data.
.. code-block:: python
- c = Karpet(date(2019, 1, 1), date(2019, 5, 1))
- df = c.fetch_crypto_historical_data(coin="bitcoin") # Dataframe with historical data
+ k = Karpet(date(2019, 1, 1), date(2019, 5, 1))
+ df = k.fetch_crypto_historical_data(symbol="btc") # Dataframe with historical data.
df.head()
.. image:: https://raw.githubusercontent.com/im-n1/karpet/master/assets/historical_data.png
@@ -82,8 +82,8 @@ Retrieves exchange list.
.. code-block:: python
- c = Karpet()
- c.fetch_exchanges("nrg")
+ k = Karpet()
+ k.fetch_exchanges("nrg")
['DigiFinex', 'KuCoin', 'CryptoBridge', 'Bitbns', 'CoinExchange']
``fetch_tweets()``
@@ -92,8 +92,8 @@ Retrieves twitter tweets.
.. code-block:: python
- c = Karpet(date(2019, 1, 1), date(2019, 5, 1))
- df = c.fetch_tweets(kw_list=["bitcoin"], lang="en") # Dataframe with tweets.
+ k = Karpet(date(2019, 1, 1), date(2019, 5, 1))
+ df = k.fetch_tweets(kw_list=["bitcoin"], lang="en") # Dataframe with tweets.
df.head()
.. image:: https://raw.githubusercontent.com/im-n1/karpet/master/assets/tweets.png
@@ -104,8 +104,8 @@ Retrieves Google Trends - in percents for the given date range.
.. code-block:: python
- c = Karpet(date(2019, 1, 1), date(2019, 5, 1))
- df = c.fetch_google_trends(kw_list=["bitcoin"]) # Dataframe with trends.
+ k = Karpet(date(2019, 1, 1), date(2019, 5, 1))
+ df = k.fetch_google_trends(kw_list=["bitcoin"]) # Dataframe with trends.
df.head()
.. image:: https://raw.githubusercontent.com/im-n1/karpet/master/assets/google_trends.png
@@ -125,8 +125,8 @@ Retrieves crypto news.
.. code-block:: python
- c = Karpet()
- news = c.fetch_news("btc") # Gets 10 news.
+ k = Karpet()
+ news = k.fetch_news("btc") # Gets 10 news.
print(news[0])
{
'url': 'https://cointelegraph.com/ ....', # Truncated.
@@ -135,10 +135,47 @@ Retrieves crypto news.
'date': datetime.datetime(2019, 7, 10, 19, 0, 13),
'image': 'https://images.cointelegraph.com/....jpg' # Truncated.
}
- news = c.fetch_news("btc", limit=30) # Gets 30 news.
+ news = k.fetch_news("btc", limit=30) # Gets 30 news.
+
+``fetch_top_news()``
+~~~~~~~~~~~~~~~~~~~~
+Retrieves top crypto news in 2 categories:
+
+* Editor's choices - articles picked by editors
+* Hot stories - articles with most views
+
+.. code-block:: python
+
+ k = Karpet()
+ editors_choices, top_stories = k.fetch_top_news()
+ print(len(editors_choices))
+ 5
+ print(len(top_stories))
+ 5
+ print(editors_choices[0])
+ {
+ 'url': 'https://cointelegraph.com/...', # Truncated.
+ 'title': 'Bank of China’s New Infographic Shows Why Bitcoin Price Is Going Up',
+ 'date': '2019-07-27T10:07:00+01:00',
+ 'image': 'https://images.cointelegraph.com/images/740_aHR...', # Truncated.
+ 'description': 'The Chinese central bank released on its website an ...' # Truncated.
+ }
+ print(top_stories[0])
+ {
+ 'url': 'https://cointelegraph.com/...', # Truncated.
+ 'title': 'Bitcoin Price Shuns Volatility as Analysts Warn of Potential Drop to $7,000',
+ 'date': '2019-07-22T09:21:00+01:00',
+ 'image': 'https://images.cointelegraph.com/images/740_aHR0c...' # Truncated.
+ 'description': 'Stability around $10,600 for Bitcoin price is ...' # Truncated.
+ }
+
Changelog
---------
+0.2.5
+~~~~~
+* Added ``fetch_top_news()`` method for top crypto news separated in 2 categories.
+
0.2.4
~~~~~
* ``fetch_news()`` adds new "description" item and renames "image_url" to "image".
diff --git a/karpet/core.py b/karpet/core.py
index 65d663a..ad44c8b 100644
--- a/karpet/core.py
+++ b/karpet/core.py
@@ -11,9 +11,8 @@ import requests
import aiohttp
import re
-from datetime import timedelta, datetime
+from datetime import timedelta
import time
-import sys
import asyncio
@@ -311,8 +310,24 @@ class Karpet:
* description
* date
* image
+
+ :param str symbol: Coin symbol the news will be fetched for.
+ :param int limit: Limit for news count.
"""
+ def get_news(symbol, limit):
+ """
+ Fetches news from coincodex.com.
+
+ :return: List of news urls.
+ :rtype: list
+ """
+
+ url = f"https://coincodex.com/api/coincodexicos/get_news/{symbol}/{limit}/1/"
+ data = self._get_json(url)
+
+ return [{"url": d["url"]} for d in data]
+
def get_coin_slug(symbol):
"""
Determines coin coincodex.com URL slug for the given
@@ -339,78 +354,158 @@ class Karpet:
if c["symbol"].upper() == symbol.upper():
return c["shortname"]
- async def fetch_features(news):
+ # Fetch features.
+ news = get_news(symbol, limit)
+ asyncio.run(self._fetch_news_features(news))
+
+ return self._drop_bad_news(news)[:limit]
+
+ def fetch_top_news(self):
+ """
+ Fetches top crypto news. Returns Editor's choice and Hot stories.
+
+ * url
+ * title
+ * description
+ * date
+ * image
+
+ :return: Tuple where first are editors choice news and second hot stories.
+ :rtype: tuple
+ """
+
+ def get_top_news():
"""
- Asynchronously fetches all news features.
+ Fetches editors choice and hot stories from cointelegraph.com front page.
- :param list news: List of news objects.
+ :return: Dict with ``editors_choice`` and ``hot_stories`` items.
+ :rtype: dict
"""
- async def fetch_all(session, news):
+ headers = {
+ "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0",
+ }
+ response = requests.get("https://cointelegraph.com/", headers=headers)
+ dom = BeautifulSoup(response.text, "lxml")
+
+ def parse_section_news(section):
"""
- Fetches all news features.
+ Parse section with news. Section contains of titles and
+ links to the news where only links are parsed out.
- :param aiohttp.ClientSession session: Session instance.
- :param list news: List of news objects.
+ :param object section: BeautifulSoap element object of the section.
+ :return: List of news objects - {"url": "..."}.
+ :rtype: list
"""
- await asyncio.gather(*[fetch_one(session, n) for n in news])
+ news_items = section.find_all(class_="main-news-tabs__item")
+ news = []
- async def fetch_one(session, news):
- """
- Fetches a few features to the given news object. Features
- are set directly to the news object.
- Fetched features are:
+ if len(news_items):
- * image
- * description
+ for i in news_items:
+ news.append({"url": i.find("a")["href"]})
- :param aiohttp.ClientSession session: Session instance.
- :param object news: News object.
- """
+ return news
+
+ editors_choice, hot_stories = dom.find_all(class_="main-news-tabs__list")
+
+ return parse_section_news(editors_choice), parse_section_news(hot_stories)
+
+ # Fetch features.
+ editors_choice, hot_stories = get_top_news()
+ asyncio.run(self._fetch_news_features(editors_choice))
+ print("here")
+ asyncio.run(self._fetch_news_features(hot_stories))
+ print("here 2")
- async with session.get(news["url"]) as response:
+ return editors_choice, hot_stories
- html = await response.text()
- dom = BeautifulSoup(html, features="lxml")
+ async def _fetch_news_features(self, news):
+ """
+ Asynchronously fetches all news features.
- # Image.
- try:
- news["image"] = dom.find("meta", {"property": "og:image"})["content"]
- except:
- news["image"] = None
+ :param list news: List of news objects.
+ """
- # Description.
- try:
- news["description"] = dom.find("meta", {"property": "og:description"})["content"]
- except:
- news["description"] = None
+ async def fetch_all(session, news):
+ """
+ Fetches all news features.
- async with aiohttp.ClientSession() as session:
- await fetch_all(session, news)
+ :param aiohttp.ClientSession session: Session instance.
+ :param list news: List of news objects.
+ """
- url = f"https://coincodex.com/api/coincodexicos/get_news/{symbol}/{limit}/1/"
- data = self.get_json(url)
+ await asyncio.gather(*[fetch_one(session, n) for n in news])
- news = []
+ async def fetch_one(session, news):
+ """
+ Fetches a few features to the given news object. Features
+ are set directly to the news object.
+ Fetched features are:
- for n in data:
- try:
- news.append({
- "url": n["url"],
- "title": n["title"],
- "date": datetime.strptime(n["date"], "%Y-%m-%d %H:%M:%S")
- })
- except:
- tb = sys.exc_info()[2]
- raise Exception("Couldn't parse news. Skipping...").with_traceback(tb)
+ * date
+ * image
+ * description
+
+ :param aiohttp.ClientSession session: Session instance.
+ :param object news: News object.
+ """
+
+ async with session.get(news["url"]) as response:
+
+ html = await response.text()
+ dom = BeautifulSoup(html, features="lxml")
+
+ # Title.
+ try:
+ news["title"] = dom.find("meta", {"property": "og:title"})["content"]
+ except:
+ news["title"] = None
+
+ # Date.
+ try:
+ news["date"] = dom.find("meta", {"property": "article:published_time"})["content"]
+ except:
+ news["date"] = None
+
+ # Image.
+ try:
+ news["image"] = dom.find("meta", {"property": "og:image"})["content"]
+ except:
+ news["image"] = None
+
+ # Description.
+ try:
+ news["description"] = dom.find("meta", {"property": "og:description"})["content"]
+ except:
+ news["description"] = None
+
+ async with aiohttp.ClientSession() as session:
+ await fetch_all(session, news)
+
+ def _drop_bad_news(self, news):
+ """
+ Drops news that doesn't suit following requirements.
+
+ * must have published date (date)
+
+ :param list news: List of news.
+ :return: Filtered list of news.
+ :rtype: list
+ """
+
+ filtered_news = []
+
+ for n in news:
+ if not n["date"]:
+ continue
- # Fetch news features.
- asyncio.run(fetch_features(news))
+ filtered_news.append(n)
- return news
+ return filtered_news
- def get_json(self, url):
+ def _get_json(self, url):
"""
Downloads data from the given URL and parses them as JSON.
Handles exception and raises own ones with sane messages.
diff --git a/karpet/meta.py b/karpet/meta.py
index c698071..fdbef70 100644
--- a/karpet/meta.py
+++ b/karpet/meta.py
@@ -1,2 +1,2 @@
-__version__ = "0.2.4.1"
+__version__ = "0.2.5"
__description__ = "Library for fetching coin/token metrics data from the internet."
diff --git a/test_karpet.py b/test_karpet.py
index c0f93f8..222337f 100644
--- a/test_karpet.py
+++ b/test_karpet.py
@@ -45,10 +45,10 @@ def test_fetch_tweets():
def test_fetch_news():
- c = Karpet()
- news = c.fetch_news("eth")
+ k = Karpet()
+ news = k.fetch_news("eth")
- assert len(news) == 10
+ assert len(news) > 0
assert "url" in news[0]
assert "title" in news[0]
assert "date" in news[0]
@@ -56,6 +56,25 @@ def test_fetch_news():
def test_fetch_news_with_limit():
- c = Karpet()
+ k = Karpet()
+ news = k.fetch_news("eth", limit=30)
+
+ assert 0 < len(news) <= 30
+ print(f"Fetched {len(news)} news.")
+
+
+def test_fetch_top_news():
+
+ k = Karpet()
+ editors_choice, hot_stories = k.fetch_top_news()
+
+ assert len(editors_choice) == 5
+ assert len(hot_stories) == 5
+
+ assert "url" in editors_choice[0]
+ assert "title" in editors_choice[0]
+ assert "date" in editors_choice[0]
- assert len(c.fetch_news("eth", limit=30)) == 30
+ assert "url" in hot_stories[0]
+ assert "title" in hot_stories[0]
+ assert "date" in hot_stories[0]