# Create and activate a Python virtual environment. 
# Before starting the project, try all these imports FIRST
# Address any errors you get running this code cell 
# by installing the necessary packages into your active Python environment.
# Try to resolve issues using your materials and the web.
# If that doesn't work, ask for help in the discussion forums.
# You can't complete the exercises until you import these - start early! 
# We also import Counter and randint (included in the Python Standard Library).

from collections import Counter
from random import randint
import matplotlib.pyplot as plt

print('All prereqs installed.')
!pip list

All prereqs installed.
Package                 Version
----------------------- -----------
asttokens               3.0.0
colorama                0.4.6
comm                    0.2.2
contourpy               1.3.2
cycler                  0.12.1
debugpy                 1.8.14
decorator               5.2.1
executing               2.2.0
fonttools               4.58.4
ipykernel               6.29.5
ipython                 9.3.0
ipython_pygments_lexers 1.1.1
jedi                    0.19.2
jupyter_client          8.6.3
jupyter_core            5.8.1
kiwisolver              1.4.8
matplotlib              3.10.3
matplotlib-inline       0.1.7
nest-asyncio            1.6.0
numpy                   2.3.1
packaging               25.0
pandas                  2.3.0
parso                   0.8.4
pillow                  11.2.1
pip                     25.1.1
platformdirs            4.3.8
prompt_toolkit          3.0.51
psutil                  7.0.0
pure_eval               0.2.3
Pygments                2.19.2
pyparsing               3.2.3
python-dateutil         2.9.0.post0
pytz                    2025.2
pywin32                 310
pyzmq                   27.0.0
six                     1.17.0
stack-data              0.6.3
tornado                 6.5.1
traitlets               5.14.3
tzdata                  2025.2
wcwidth                 0.2.13

TEXT = "At three o’clock precisely I was at Baker Street, but Holmes had not yet returned. The landlady informed me that he had left the house shortly after eight o’clock in the morning. I sat down beside the fire, however, with the intention of awaiting him, however long he might be. I was already deeply interested in his inquiry, for, though it was surrounded by none of the grim and strange features which were associated with the two crimes which I have already recorded, still, the nature of the case and the exalted station of his client gave it a character of its own. Indeed, apart from the nature of the investigation which my friend had on hand, there was something in his masterly grasp of a situation, and his keen, incisive reasoning, which made it a pleasure to me to study his system of work, and to follow the quick, subtle methods by which he disentangled the most inextricable mysteries. So accustomed was I to his invariable success that the very possibility of his failing had ceased to enter into my head.".lower()
letter_freq = dict(Counter(TEXT))

# make a bar plot with an appropriate title, correct xtick labels, and labeled axes

################################
# Sort the dictionary by character (alphabetically)
sorted_items = sorted(letter_freq.items())  # Sorts by character (the dictionary keys)

# Unpack characters and their frequencies
chars, freqs = zip(*sorted_items)


# Create the bar plot
plt.figure(figsize=(15, 6))
plt.bar(chars, freqs, color='skyblue', edgecolor='black')

# Add labels and title
plt.title("Character Frequency Distribution")
plt.xlabel("Character")
plt.ylabel("Frequency")
plt.xticks(rotation=45)  # Rotate x-axis labels for readability
plt.grid(axis='y', linestyle='--', alpha=0.7)

plt.tight_layout()
plt.show()

# Generate a sequence of random integers between 1 and 21
values = [randint(1, 21) for _ in range(15)]
indices = list(range(len(values)))

# Create subplots: one for scatter, one for line plot
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))

# Scatter plot
ax1.scatter(indices, values, color='tomato')
ax1.set_title("Scatter Plot")
ax1.set_xlabel("Index")
ax1.set_ylabel("Random Value")

# Line plot
ax2.plot(indices, values, marker='o', color='mediumseagreen')
ax2.set_title("Line Plot")
ax2.set_xlabel("Index")
ax2.set_ylabel("Random Value")

plt.tight_layout()
plt.show()

# Generate a sequence of random integers
values = [random.randint(1, 100) for _ in range(15)]
indices = list(range(len(values)))

# Create subplots side by side
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))

# Scatter plot with custom style
ax1.scatter(indices, values, color='darkviolet', marker='^', s=100, edgecolors='black')
ax1.set_title("Styled Scatter Plot", fontsize=12, fontweight='bold')
ax1.set_xlabel("Index", fontsize=10)
ax1.set_ylabel("Random Value", fontsize=10)
ax1.grid(True, linestyle='--', alpha=0.5)

# Line plot with custom style
ax2.plot(indices, values, color='orange', linewidth=2.5, linestyle='--', marker='D', markersize=8, markerfacecolor='blue')
ax2.set_title("Styled Line Plot", fontsize=12, fontweight='bold')
ax2.set_xlabel("Index", fontsize=10)
ax2.set_ylabel("Random Value", fontsize=10)
ax2.grid(True, linestyle=':', alpha=0.5)

plt.tight_layout()
plt.show()

sizes = [10, 20, 50, 100, 1000, 5000]
insertion_sort_times = [0.0010252999999999998, 0.0027026999999999954, 0.010147200000000002, 0.0381137, 3.6303399, 91.2180796]
merge_sort_times = [0.00161889999999687, 0.003635600000009731, 0.0090655000000055, 0.020108000000007564, 0.2687474000000094, 1.6147050999999948]

#############################
# Create the plot
plt.figure(figsize=(10, 4))
plt.plot(sizes, insertion_sort_times, label='Insertion Sort', marker='o', color='tomato')
plt.plot(sizes, merge_sort_times, label='Merge Sort', marker='s', color='mediumseagreen')

# Add labels and title
plt.title("Sorting Algorithm Performance")
plt.xlabel("Input Size")
plt.ylabel("Execution Time (seconds)")
plt.legend()
plt.grid(True, linestyle='--', alpha=0.6)

# Display the plot
plt.tight_layout()
plt.show()

Web Mining and Applied NLP (44-620)¶

Matplotlib and pyplot¶

Student Name: Matthew Block¶

Question 1:¶

Question 2:¶

Question 3:¶

Question 4:¶