VLC_HTTP_LAUNCHER/scrapper.py

43 lines
1.1 KiB
Python
Raw Normal View History

2023-05-16 16:33:36 -04:00
from bs4 import BeautifulSoup
from urllib.parse import quote, unquote
import utils
import sys
2023-05-16 16:33:36 -04:00
import requests
def get_url(site: dict, path: str):
url = utils.get_base_url(site) + quote(path)
return (url)
2023-05-16 17:25:05 -04:00
def get_files(site:dict, path:str) -> []:
url = get_url(site, path)
files = get_files_by_url(url)
return (files)
def get_files_by_url(url: str):
2023-05-16 16:33:36 -04:00
if (url in ["/../", "../"]):
return ([])
response = requests.get(url)
if (response.status_code != 200):
print("connection:", response.reason)
print(url)
2023-05-16 16:33:36 -04:00
sys.exit(1)
soup = BeautifulSoup(response.text, 'html.parser')
files = []
for element in soup.findAll("a"):
files.append(unquote(element["href"]))
return (files)
2023-05-16 16:33:36 -04:00
def get_uri(url: str) -> []:
if (url in ["/../", "../"]):
return ([])
try:
response = requests.get(url)
if (response.status_code != 200):
print("connection:", response.reason)
sys.exit(1)
soup = BeautifulSoup(response.text, 'html.parser')
return(soup.find("h1").text[9:])
except:
return ("")