StandardCAS-NSTID
/

Project-Estallie

+import requests
+from bs4 import BeautifulSoup
+import re
+import time
+from selenium import webdriver
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.chrome.service import Service
+url = "https://www.deviantart.com/amber2024/gallery"
+def get_values(url):
+    response = requests.get(url)
+    soup = BeautifulSoup(response.content, 'html.parser')
+    spans = soup.findAll('span', class_="_3AClx")
+    favs = 0
+    comments = 0
+    views = 0
+    #print(spans)
+    # Iterate over each span in the list
+    c = 0
+    for span in spans:
+        # Extract the numeric value and the unit (Favourites, Comment, Views)
+        print('\n'+str(list(span))+str(c)+'\n')
+        value = str(list(span)[0]).strip('</span>')
+        unit = str(list(span)[2]).lstrip('abcdeghijklmnop qrstuvwxyz_1234567890N"=></').rstrip('/span>')
+        #print(value)
+        # Convert value to numeric format
+        if 'K' in value:
+            value = float(value[:-1]) * 1000
+        else:
+            #print(str(len(value))+'val'+value)
+            value = int(value)
+        print(unit)
+        # Check the unit and update the corresponding variable
+        if unit == 'Favourites<' or unit == 'Favourite':
+            favs = value
+        elif unit == 'Comments<' or unit == 'Comment<':
+            comments = value
+        elif unit == 'Views<' or unit == 'View':
+            views = value
+        c+=1
+    #print(favs,comments,views)
+    return (favs,comments,views)
+def get_tags(url):
+    response = requests.get(url)
+    soup = BeautifulSoup(response.content, 'html.parser')
+    spans = soup.findAll('span', class_="_1nwad")
+    tags = []
+    #print(spans)
+    # Iterate over each span in the list
+    c = 0
+    for span in spans:
+        tags.append(str(span).split('>')[1].split('<')[0])
+    print(tags,spans)
+    return tags
+def get_links(url,page=1):
+    service = Service('/Users/osmond/Downloads/chromedriver-mac-arm64/chromedriver')  # Path to chromedriver executable
+    driver = webdriver.Chrome(service=service)
+    driver.get(url+'?page='+str(page))
+    # Scroll to the bottom of the page
+    last_height = driver.execute_script("return document.body.scrollHeight")
+    while True:
+        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
+        time.sleep(0)  # Adjust sleep time as needed
+        new_height = driver.execute_script("return document.body.scrollHeight")
+        if new_height == last_height:
+            break
+        last_height = new_height
+    soup = BeautifulSoup(driver.page_source, 'html.parser')
+    links = []
+    for link in soup.find_all('a'):
+        links.append(link.get('href'))
+    #driver.quit()
+    return links
+drawings = []
+names = []
+def recursion(url):
+    global get_links, drawings, names
+    recur = []
+    cecant = get_links(url)#tmp
+    secant = False
+    cocecant = 1
+    cosecant = []
+    for i in cecant:
+        if '/all' in i and not '/all?' in i:
+            secant = True
+            recur.append(i)
+        if '?page=' in i:
+            cosecant.append(int(i.split('?page=')[1]))
+    print(cosecant,'cosecant')
+    recur = list(set(recur))
+    try:
+        cocecant = max(cosecant)
+    except:
+        print('Only One Page')
+    print(cocecant,'cocecant')
+    if secant != True:
+        for i in cecant:
+            if "/gallery/" in i:
+                recur.append(i)
+    print(recur,'reccc')
+    for j in recur:
+        cecant = get_links(j)#tmp
+        secant = False
+        cocecant = 1
+        cosecant = []
+        for i in cecant:
+            if '/all' in i and not '/all?' in i:
+                secant = True
+                recur.append(i)
+            if '?page=' in i:
+                cosecant.append(int(i.split('?page=')[1]))
+        recur = list(set(recur))
+        print(recur)
+        print(cosecant,'cosc')
+        try:
+            cocecant = max(cosecant)
+        except:
+            print('Only One Page')
+        for z in range(1,cocecant+1):
+            print(z)
+            x = get_links(j,page=z)
+            flag = False
+            alled = False #If there is a folder for All Deviations
+            for k in x:
+                if '/art' in k:
+                    flag = True
+                    break
+            if flag == True:
+                print(x,'xxxxxxxxx')
+                for c in x:
+                    if "/art/" in c and not "#comments" in c and not c in drawings:
+                        drawings.append(c)
+                        names.append(c.split('/art/')[1])
+            else:
+                break
+    drawings = list(set(drawings))
+#print(get_links(url))
+recursion(url)
+#print(drawings)
+finalle = []
+names = []
+def recur_works():
+    global finalle
+    for i in drawings:
+        finalle.append(get_values(i))
+import threading
+drawings = list(set(drawings))
+tag_sets = []
+# Function to process one item from the drawings list
+def process_item(item):
+    global tag_sets
+    finalle.append(get_values(item))
+    names.append(item.split('/art/')[1])
+    tag_sets.append(get_tags(item))
+# Divide the drawings into chunks for each thread
+num_threads = 1
+chunk_size = len(drawings) // num_threads if len(drawings) % num_threads == 0 else len(drawings) // num_threads + 1
+chunks = [drawings[i:i+chunk_size] for i in range(0, len(drawings), chunk_size)]
+# Create and start worker threads
+threads = []
+for chunk in chunks:
+    for drawing in chunk:
+        # Create a new thread for each item (or group them per chunk as needed)
+        t = threading.Thread(target=process_item, args=(drawing,))
+        threads.append(t)
+        t.start()
+# Wait for all threads to complete
+for t in threads:
+    t.join()
+def get_summation():
+    print(finalle)
+    favs = 0
+    comm = 0
+    view = 0
+    for i in finalle:
+        if i!=False:
+            favs += i[0]
+            comm += i[1]
+            view += i[2]
+    print('favs:',favs,'comm:',comm,'view:',view, 'names:', names)
+def get_tag_summation():
+    post_processed_tags = []
+    indexx = []
+    for c in range(len(tag_sets)):
+        i = tag_sets[c]
+        for j in i:
+            if j in indexx:
+                post_processed_tags[indexx.index(j)][1] = list(post_processed_tags[indexx.index(j)][1])
+                post_processed_tags[indexx.index(j)][2] += 1
+                post_processed_tags[indexx.index(j)][1][0] += finalle[c][0]
+                post_processed_tags[indexx.index(j)][1][1] += finalle[c][1]
+                post_processed_tags[indexx.index(j)][1][2] += finalle[c][2]
+            else:
+                post_processed_tags.append([j, finalle[c], 1])
+                indexx.append(j)
+    return post_processed_tags
+#recur_works()
+get_summation()
+e = get_tag_summation()
+print(e)