This commit is contained in:
Nova Cat 2024-12-17 16:24:56 -08:00
parent ea6a360bb9
commit 1e5d2fc29b

View file

@ -10,6 +10,7 @@ from random import choice, randint as random, randrange
import traceback import traceback
import threading import threading
from pathlib import Path from pathlib import Path
from urllib.parse import urlparse, parse_qs
from requests import get, exceptions as rex from requests import get, exceptions as rex
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from googleapiclient.discovery import build from googleapiclient.discovery import build
@ -38,11 +39,25 @@ def script_js():
return send_file("script.js", mimetype='application/javascript') return send_file("script.js", mimetype='application/javascript')
threading.Thread(target=app.run, daemon=True, kwargs={"port": 2005}).start() threading.Thread(target=app.run, daemon=True, kwargs={"port": 2005}).start()
# YouTube API
DEVELOPER_KEY = environ["ytapi"] DEVELOPER_KEY = environ["ytapi"]
headers = { headers = {
'User-Agent': 'SweeBot IRC ' + __version__ 'User-Agent': 'SweeBot IRC ' + __version__
} }
def get_yt_id(url):
query = urlparse(url)
# youtu.be already contains the ID in the path
if query.hostname == 'youtu.be': return query.path[1:]
if query.hostname in {'www.youtube.com', 'youtube.com', 'music.youtube.com'}:
# URLs that have the ID in the path instead of the query.
integrated_in_url = ["watch", "embed", "v", "shorts"]
try:
# The regular /watch path, which stores the ID in the query.
if query.path == '/watch': return parse_qs(query.query)['v'][0]
# Alternatively, it will get the ID in the path if the path was in the list above.
elif query.path.split('/')[1] in integrated_in_url: return query.path.split('/')[2]
except:
return None
class config: class config:
def __init__(self): def __init__(self):
self.conn = sqlite3.connect(environ["SBconfig"]) self.conn = sqlite3.connect(environ["SBconfig"])
@ -649,39 +664,39 @@ while True:
if sbconfig.cflagexist(channel, "+links"): if sbconfig.cflagexist(channel, "+links"):
try: try:
for i in command: for i in command:
if i[:8] == "https://": parse = urlparse(i)
if parse.scheme in ["http", "https"]:
try: try:
e = get(i, headers=headers, timeout=10) try:
header = e.headers if parse.hostname in ["youtube.com", "youtu.be", "www.youtube.com", "m.youtube.com", "youtube-nocookie.com"]:
content_type = header.get('content-type').split(";")[0] video_id = get_yt_id(i)
content_len = header.get('Content-length') youtube = build('youtube', 'v3', developerKey=DEVELOPER_KEY)
if content_type in allowedparse: request = youtube.videos().list(part='snippet,statistics', id=video_id)
if e.ok: details = request.execute()
soup = BeautifulSoup(e.text, 'html.parser') title = details['items'][0]['snippet']['title']
multiline("(" + nick + ") " + (" ".join(soup.title.string.splitlines())[:100] if soup.title != None else "[No title provided]"), channel) channel = details['items'][0]['snippet']['channelTitle']
views = details['items'][0]['statistics']['viewCount']
multiline("(" + nick + ") [▶️ YouTube] {title} | Author: {channel} | {views} views", channel)
else: else:
multiline("(" + nick + ") [HTTP " + str(e.status_code) + "]", channel) raise Exception("No special URL, go ahead and parse the normal title...")
else: except:
multiline("(" + nick + ") [" + humanbytes(content_len) + " " + str(content_type) + "]", channel) e = get(i, headers=headers, timeout=10)
parsed += 1 header = e.headers
content_type = header.get('content-type').split(";")[0]
content_len = header.get('Content-length')
if content_type in allowedparse:
if e.ok:
soup = BeautifulSoup(e.text, 'html.parser')
multiline("(" + nick + ") " + (" ".join(soup.title.string.splitlines())[:100] if soup.title != None else "[No title provided]"), channel)
else:
multiline("(" + nick + ") [HTTP " + str(e.status_code) + "]", channel)
else:
multiline("(" + nick + ") [" + humanbytes(content_len) + " " + str(content_type) + "]", channel)
parsed += 1
except rex.SSLError as ex: except rex.SSLError as ex:
multiline("(" + nick + ") [SSL Error: " + str(ex.message) + "]", channel) multiline("(" + nick + ") [SSL Error: " + str(ex.message) + "]", channel)
except Exception as ex: except Exception as ex:
multiline("(" + nick + ") [Request error: " + str(ex.message) + "]", channel) multiline("(" + nick + ") [Request error: " + str(ex.message) + "]", channel)
elif i[:7] == "http://":
e = get(i, headers=headers, timeout=10)
header = e.headers
content_type = header.get('content-type').split(";")[0]
content_len = header.get('Content-length')
if content_type in allowedparse:
if e.ok:
soup = BeautifulSoup(e.text, 'html.parser')
multiline("(" + nick + ") " + (" ".join(soup.title.string.splitlines())[:100] if soup.title != None else "[No title provided]"), channel)
else:
multiline("(" + nick + ") [HTTP " + str(e.status_code) + "]", channel)
else:
multiline("(" + nick + ") [" + humanbytes(content_len) + " " + str(content_type) + "]", channel)
parsed += 1
except: except:
print(traceback.format_exc()) print(traceback.format_exc())
elif "JOIN" in text and "#nixsanctuary" in text: elif "JOIN" in text and "#nixsanctuary" in text: