2023-05-16 16:33:36 -04:00
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
from urllib.parse import quote, unquote
|
2023-05-31 09:28:47 -04:00
|
|
|
import utils
|
2023-05-31 09:58:59 -04:00
|
|
|
import sys
|
2023-05-16 16:33:36 -04:00
|
|
|
import requests
|
|
|
|
|
2023-05-31 09:28:47 -04:00
|
|
|
def get_url(site: dict, path: str):
|
|
|
|
url = utils.get_base_url(site) + quote(path)
|
|
|
|
return (url)
|
|
|
|
|
2023-05-16 17:25:05 -04:00
|
|
|
def get_files(site:dict, path:str) -> []:
|
2023-05-31 09:28:47 -04:00
|
|
|
url = get_url(site, path)
|
2023-05-31 09:58:59 -04:00
|
|
|
files = get_files_by_url(url)
|
|
|
|
return (files)
|
|
|
|
|
|
|
|
def get_files_by_url(url: str):
|
2023-05-16 16:33:36 -04:00
|
|
|
if (url in ["/../", "../"]):
|
|
|
|
return ([])
|
|
|
|
response = requests.get(url)
|
|
|
|
if (response.status_code != 200):
|
|
|
|
print("connection:", response.reason)
|
2023-05-31 09:58:59 -04:00
|
|
|
print(url)
|
2023-05-16 16:33:36 -04:00
|
|
|
sys.exit(1)
|
|
|
|
soup = BeautifulSoup(response.text, 'html.parser')
|
|
|
|
files = []
|
|
|
|
for element in soup.findAll("a"):
|
|
|
|
files.append(unquote(element["href"]))
|
|
|
|
return (files)
|
|
|
|
|
2023-05-31 09:58:59 -04:00
|
|
|
|
2023-05-16 16:33:36 -04:00
|
|
|
def get_uri(url: str) -> []:
|
|
|
|
if (url in ["/../", "../"]):
|
|
|
|
return ([])
|
|
|
|
try:
|
|
|
|
response = requests.get(url)
|
|
|
|
if (response.status_code != 200):
|
|
|
|
print("connection:", response.reason)
|
|
|
|
sys.exit(1)
|
|
|
|
soup = BeautifulSoup(response.text, 'html.parser')
|
|
|
|
return(soup.find("h1").text[9:])
|
|
|
|
except:
|
|
|
|
return ("")
|