# nopycln: file
from typing import Tuple, List
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.axes import Axes
from matplotlib.figure import Figure
from wordcloud import WordCloud
from typing import Optional
from twixl.collections.twitter import WordFrequencyResults, TweetMetrics
[docs]def plot_word_cloud(
frequencies: pd.Series,
width: int = 800,
height: int = 400,
max_words: int = 200,
stopwords: Optional[List[str]] = None,
background_color: str = "white",
min_word_length: int = 0,
) -> Figure:
"""Plots the word-frequency list as a wordcloud.
:param frequencies: item frequencies as generated by any of the x_frequencies() methods.
:param width: Width of the canvas.
:param height: Height of the canvas.
:param max_words: The maximum number of words in the wordcloud.
:param stopwords: A list of stopwords that should be filtered from the wordcloud.
:param background_color: Background color for the word cloud image.
:param min_word_length: Minimum number of letters a word must
have to be included.
:return: Word cloud plot
Usage::
>>> from twixl.collections import twitter
>>> twitter.plotting.plot_word_cloud(
>>> frequencies,
>>> stopwords=stopwords,
>>> max_words=100,
>>> min_word_length=4
>>> )
<matplotlib.image.AxesImage>
"""
# df = word_frequency_results.to_pandas(by_hour=False)
if stopwords:
# Filter stopwords from word frequency dataframe.
frequencies = frequencies[~frequencies.index.isin(stopwords)]
# df = df[~df["word"].isin(stopwords)]
# data = df[df["frequency"] > 1000].set_index("word").to_dict()["frequency"] # type: ignore
wc = WordCloud(
width=width,
height=height,
max_words=max_words,
background_color=background_color,
# stopwords=stopwords,
min_word_length=min_word_length,
).generate_from_frequencies(frequencies.to_frame("count").to_dict()["count"])
# Set figure size
# TODO: This should be configurable?
plt.figure()
# No axis details
plt.axis("off")
return plt.imshow(wc)
def plot_circular_bars(
frequencies: WordFrequencyResults,
stopwords: Optional[List[str]] = None,
group_size: int = 5,
) -> Tuple[Figure, Axes]:
raise NotImplementedError
# """Plots the word-frequency list as a circular bar plot.
# :param stopwords: A list of stopwords that should be filtered from the wordcloud.
# :param group_size: The number of bars per hour.
# Usage::
# >>> from twixl.collections import twitter
# >>> twitter.plotting.plot_circular_bars(
# >>> word_frequencies,
# >>> stopwords=stopwords,
# >>> group_size=3
# >>> );
# (<Figure>, <PolarAxesSubplot>)
# """
# df = word_frequency_results.to_pandas(by_hour=True)
# if stopwords:
# # Filter stopwords from word frequency dataframe.
# df = df[~df["word"].isin(stopwords)]
# GROUPS_SIZE = [group_size] * 24
# df = df.groupby("hour").head(group_size).sort_values(by=["hour", "frequency"])
# # Raise an error if we do not have enough words per day to plot based on the group size
# # TODO: if we want to keep this method around, fix the plotting instead of raising this error
# min_words_per_day = df.groupby("hour").count()["frequency"].min()
# if min_words_per_day < group_size:
# raise ValueError(
# f"maximum group size based on provided data is {min_words_per_day}, but {group_size} expected, please lower group size"
# )
# # Reindex/reverse arrays to plot bars clockwise
# VALUES = df["frequency"].reindex(index=df["frequency"].index[::-1])
# LABELS = df["word"].reindex(index=df["word"].index[::-1])
# BASE_LABELS = [f"{i:02d}:00" for i in range(1, 24)]
# BASE_LABELS.append("00:00")
# BASE_LABELS.reverse()
# GROUP = df["hour"].values
# OFFSET = 0
# PAD = 3
# ANGLES_N = len(VALUES) + PAD * len(np.unique(GROUP)) # type: ignore
# ANGLES = np.linspace(0, 2 * np.pi, num=ANGLES_N, endpoint=False)
# ANGLES = ANGLES + 0.5 * np.pi
# WIDTH = (2 * np.pi) / len(ANGLES)
# offset = OFFSET
# IDXS = []
# for size in GROUPS_SIZE:
# IDXS += list(range(offset + PAD, offset + size + PAD))
# offset += size + PAD
# COLORS = [f"C{i}" for i, size in enumerate(GROUPS_SIZE) for _ in range(size)]
# # Create circular barplot with labels
# fig, ax = plt.subplots(figsize=(16, 16), subplot_kw={"projection": "polar"})
# ax.set_theta_offset(OFFSET)
# ax.set_ylim(-VALUES.max(), VALUES.max())
# ax.set_frame_on(False)
# ax.xaxis.grid(False)
# ax.yaxis.grid(False)
# ax.set_xticks([])
# ax.set_yticks([])
# ax.bar(
# ANGLES[IDXS],
# VALUES,
# width=WIDTH,
# color=COLORS,
# edgecolor="white",
# linewidth=2,
# )
# _add_labels(ANGLES[IDXS], VALUES, LABELS, OFFSET, ax)
# # This iterates over the sizes of the groups adding reference
# # lines and annotations.
# offset = 0
# rotation = 0
# for group, size in zip(BASE_LABELS, GROUPS_SIZE):
# # Calculate positon for line below bars
# x1 = np.linspace(ANGLES[offset + PAD], ANGLES[offset + size + PAD - 1], num=50)
# # Plot line below bars
# ax.plot(x1, [-5] * 50, color="#333333")
# # calculate position for reference lines and group text
# x2 = np.linspace(ANGLES[offset], ANGLES[offset + PAD - 1], num=50)
# # Add text with time to indicate group
# ax.text(
# np.mean(x2),
# -150,
# group,
# color="#333333",
# fontsize=10,
# fontweight="bold",
# ha="center",
# va="center",
# rotation=rotation,
# )
# ax.plot(x2, [0] * 50, color="#333333", lw=0.8)
# # Add reference lines at 20, 40, 60, and 80
# for position in [0.2, 0.4, 0.6, 0.8, 1]:
# ax.plot(x2, [VALUES.max() * position] * 50, color="#bebebe", lw=0.8)
# offset += size + PAD
# rotation += 15
# else:
# for position in [0.2, 0.4, 0.6, 0.8, 1]:
# ax.text(
# np.mean(x2),
# VALUES.max() * position + 40,
# round(VALUES.max() * position),
# color="#bebebe",
# fontsize=10,
# ha="center",
# va="center",
# rotation=-15,
# )
# return fig, ax
def plot_tweet_metrics(tweet_metrics: TweetMetrics) -> Tuple[Figure, Axes]:
"""
Plot number of tweets (in millions) for each day in the tweet metrics.
"""
fig, ax = plt.subplots()
(tweet_metrics.to_pandas() / 1000000).plot(ax=ax)
ax.set_xlabel("Timestamp")
ax.set_ylabel("Number of tweets (millions)")
return fig, ax
def _get_label_rotation(angle, offset):
"""helper function to calculate label rotation
and alignment for circular bar plot
"""
# Rotation must be specified in degrees
rotation = np.rad2deg(angle + offset)
if angle <= 1.5 * np.pi:
alignment = "right"
rotation = rotation + 180
else:
alignment = "left"
return rotation, alignment
def _add_labels(angles, values, labels, offset, ax) -> None:
"""helper function to add lables to a circular bar plot"""
# This is the space between the end of the bar and the label
padding = 4
# Iterate over angles, values, and labels, to add all of them.
for (
angle,
value,
label,
) in zip(angles, values, labels):
angle = angle
# Obtain text rotation and alignment
rotation, alignment = _get_label_rotation(angle, offset)
# And finally add the text
ax.text(
x=angle,
y=value + padding,
s=label,
ha=alignment,
va="center",
rotation=rotation,
rotation_mode="anchor",
)