"""

define your download folder, then run the script from the command line

it'll prompt for the gallery id number (the [usually] six digits at the end of the url)

and download, then wait for another number. Enter 0 to exit. As long as you keep the

gallery number and parentheses--e.g., "(195489)"--at the end of a folder name, you can

rename the folder to whatever you want and this script will recognize that it's already

been downloaded. If there are spaces in the paths, be sure to put the paths in quotes.

E.g., use "D:/Porn/hentai manga", NOT D:/Porn/hentai manga

Example command: (interactive mode for downloading multiple galleries)

python nhentai.py D:/Porn/Hentai

Alternatively: (script mode, downloading a single gallery)

python nhentai.py D:/Porn/Hentai 195489

Or: (script mode, downloading multiple galleries; list as many gallery numbers as you want)

python nhentai.py D:/Porn/Hentai 195489 304405 306322 256248

Requirements:

Python 3

requests

beautiful soup

re

goal: just download some porn

"""

import requests

import os , sys

import bs4

import re

def get_gallery ( id_num , folder ) :

if check_gallery_exists ( id_num , folder ) :

print ( f 'That gallery already exists: {id_num}' )

return

gallery_url = f 'https://nhentai.net/g/{id_num}/'

comic_page = requests. get ( gallery_url )

comic_page. raise_for_status ( )

soup = bs4. BeautifulSoup ( comic_page. text , 'lxml' )

comic_name = soup. title . string # get the title string

separator = '»'

sep_index = comic_name. find ( separator )

comic_name = comic_name [ :sep_index- 1 ] # extract the name from the title string

temp = comic_name

exclusions = [ ' ' , "'" ]

for i in comic_name:

if not i. isalnum ( ) :

if i not in exclusions:

temp = temp. replace ( i , '' ) # removing special characters or moon runes

comic_name = f '{temp} ({id_num})'

folder + = comic_name + '/'

print ( folder )

os . makedirs ( folder , exist_ok = True ) # make the comic's folder

source_url = soup. find ( id = "thumbnail-container" ) . find ( class_ = 'lazyload' ) . get ( 'data-src' )

source_url = source_url. replace ( 'https://t.nhentai.net/galleries/' , '' )

a = source_url. find ( '/' )

source_num = source_url [ :a ] # this is a string

base_url = 'https://i.nhentai.net/galleries/' + source_num + '/' # the url base for the url of the individual images

ext_list = [ '.jpg' , '.png' , '.gif' , '.jpeg' , '.bmp' ]

ext = ext_list [ 0 ]

start_num = 1

neg_search_distance = 6

p_index = comic_page. text . find ( ' pages' )

page_num_string = comic_page. text [ p_index-neg_search_distance:p_index ]

for i in '<div>' : # removes any part of the <div> tag that precedes the number of pages from the extracted text, could probably do this with RegEx

page_num_string = page_num_string. replace ( i , '' ) # remove non-numbers

num_items = int ( page_num_string )

num_len = len ( page_num_string )

for i in range ( start_num , num_items+ 1 ) :

name = str ( i ) + ext

url = base_url + str ( i ) + ext

print ( f 'downloading: {" " * (num_len - len(str(i)))}{name} / {num_items}' )

filename = folder + str ( i ) + ext

if os . path . isfile ( filename ) :

print ( "Error, '" + filename + "' already exists, skipping" )

else :

try :

found = False

image = requests. get ( url )

# image.raise_for_status()

if image. status_code == requests. codes . not_found :

for j in ext_list: # try different file extensions

print ( "trying: " + j )

url = base_url + str ( i ) + j

filename = folder + str ( i ) + j

image = requests. get ( url )

if image. status_code == requests. codes . ok :

ext = j

found = True

break

elif image. status_code == requests. codes . ok :

found = True

if found:

# print('wrote: ' + filename)

with open ( filename , 'wb' ) as img:

for chunk in image. iter_content ( 100000 ) :

img. write ( chunk )

else :

raise requests. exceptions . HTTPError

except Exception as e:

print ( "HTTP Exception: " , e )

def check_gallery_exists ( id_num , folder ) :

nhentai = re . compile ( ' \( ( \d {4,7}?) \) $' )

porn_list = os . scandir ( folder )

for i in porn_list:

if match : = nhentai. search ( i. name ) :

if match. group ( 1 ) == id_num:

return True

return False

if __name__ == '__main__' :

try :

folder = sys . argv [ 1 ]

if os . path . basename ( folder ) :

folder + = '/'

folder = folder. replace ( ' \\ ' , '/' )

try :

galleries = sys . argv [ 2 : ]

if len ( galleries ) == 0 :

raise Exception ( 'No galleries specified' )

for i in galleries:

get_gallery ( i , folder )

except Exception as e:

print ( f '{e}' )

loop = True

while ( loop ) :

gallery_num = input ( 'Enter the ID number of the nhentai gallery(0 to quit): ' )

if gallery_num == '0' :

loop = False

else :

get_gallery ( gallery_num , folder )

except Exception as e:

print ( f 'Error: {e}' )