File size: 39,697 Bytes
1ff2605 3ef8fed 1ff2605 dee4c6e 3ef8fed 036a6e1 1ff2605 3ef8fed afab07a dee4c6e 484d898 dee4c6e afab07a dee4c6e afab07a dee4c6e 3203abb dee4c6e 1ff2605 3ef8fed 1ff2605 3ef8fed 1ff2605 3ef8fed 63bdb43 3ef8fed 63bdb43 3ef8fed 63bdb43 3ef8fed 63bdb43 3ef8fed 63bdb43 3203abb 63bdb43 1ff2605 3ef8fed 1ff2605 3ef8fed 1ff2605 3ef8fed 1ff2605 3ef8fed 1ff2605 3ef8fed 1ff2605 3ef8fed 1ff2605 63bdb43 1ff2605 3ef8fed 1ff2605 dee4c6e 1ff2605 036a6e1 dee4c6e 1ff2605 dee4c6e 1ff2605 3203abb 1ff2605 63bdb43 1ff2605 3203abb 1ff2605 dee4c6e 1ff2605 036a6e1 3203abb dee4c6e 1ff2605 63bdb43 3ef8fed 63bdb43 036a6e1 63bdb43 3ef8fed 036a6e1 63bdb43 036a6e1 3203abb 63bdb43 3ef8fed 63bdb43 036a6e1 63bdb43 3ef8fed 63bdb43 036a6e1 63bdb43 3ef8fed 036a6e1 63bdb43 036a6e1 3203abb 96620a1 3ef8fed 036a6e1 63bdb43 3ef8fed 63bdb43 036a6e1 96620a1 63bdb43 1ff2605 dee4c6e 1ff2605 dee4c6e 1ff2605 dee4c6e 036a6e1 3ef8fed 63bdb43 1ff2605 036a6e1 3203abb 1ff2605 036a6e1 1ff2605 dee4c6e 3ef8fed 1ff2605 3ef8fed 1ff2605 3203abb 1ff2605 3ef8fed dee4c6e 1ff2605 3ef8fed 1ff2605 036a6e1 3203abb 1ff2605 7a6e7ae 3ef8fed 1ff2605 158a13b 1ff2605 3ef8fed 1ff2605 dee4c6e 1ff2605 3ef8fed 1ff2605 3ef8fed 1ff2605 dee4c6e 1ff2605 dee4c6e 1ff2605 7a6e7ae 036a6e1 1ff2605 dee4c6e 1ff2605 7a6e7ae 1ff2605 036a6e1 1ff2605 3ef8fed 1ff2605 3ef8fed 1ff2605 3ef8fed dee4c6e 3ef8fed 1ff2605 7a6e7ae dee4c6e 1ff2605 3ef8fed dee4c6e 1ff2605 dee4c6e 1ff2605 63bdb43 1ff2605 3ef8fed 1ff2605 63bdb43 1ff2605 3ef8fed dee4c6e 1ff2605 dee4c6e 1ff2605 3ef8fed 1ff2605 3ef8fed 1ff2605 3ef8fed 1ff2605 3ef8fed 1ff2605 3ef8fed dee4c6e 1ff2605 3203abb 3ef8fed dee4c6e 1ff2605 dee4c6e 1ff2605 3ef8fed 1ff2605 3ef8fed dee4c6e 1ff2605 3ef8fed 1ff2605 63bdb43 1ff2605 3ef8fed 1ff2605 dee4c6e 1ff2605 dee4c6e 1ff2605 dee4c6e 1ff2605 3ef8fed dee4c6e 1ff2605 dee4c6e 1ff2605 dee4c6e 1ff2605 3ef8fed 1ff2605 3ef8fed 1ff2605 dee4c6e 63bdb43 1ff2605 63bdb43 1ff2605 dee4c6e 1ff2605 dee4c6e 1ff2605 3ef8fed afab07a 1ff2605 afab07a dee4c6e afab07a dee4c6e 3ef8fed dee4c6e afab07a dee4c6e 3ef8fed afab07a 3ef8fed afab07a dee4c6e 3ef8fed dee4c6e 3ef8fed dee4c6e 1ff2605 afab07a dee4c6e 63bdb43 afab07a 3ef8fed afab07a 63bdb43 afab07a 3ef8fed afab07a dee4c6e afab07a dee4c6e afab07a dee4c6e afab07a 63bdb43 dee4c6e afab07a 3ef8fed afab07a 3ef8fed afab07a 63bdb43 3ef8fed 1ff2605 63bdb43 dee4c6e 63bdb43 1ff2605 3ef8fed 1ff2605 dee4c6e 036a6e1 3ef8fed 1ff2605 afab07a dee4c6e afab07a 1ff2605 dee4c6e 1ff2605 3ef8fed 1ff2605 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 |
const express = require('express');
const puppeteerExtra = require('puppeteer-extra'); // NEW: For stealth
const StealthPlugin = require('puppeteer-extra-plugin-stealth'); // NEW: Stealth plugin
const cors = require('cors');
const { EventEmitter } = require('events');
puppeteerExtra.use(StealthPlugin()); // NEW: Enable stealth plugin
const app = express();
const port = 7860;
app.use(cors());
app.use(express.json());
// --- Progress Tracking and Job Storage --- (Unchanged)
const progressTrackers = new Map();
const downloadJobs = new Map();
class ProgressTracker extends EventEmitter {
constructor(sessionId) {
super();
this.sessionId = sessionId;
this.progress = 0;
this.status = 'initializing';
this.message = '';
}
// oo yeah
updateProgress(progress, status, message) {
this.progress = progress;
this.status = status;
this.message = message;
const update = {
sessionId: this.sessionId,
progress,
status,
message,
timestamp: new Date().toISOString()
};
this.emit('progress', update);
console.log(`π [${this.sessionId}] ${progress}% - ${status}: ${message}`);
}
}
// --- Enhanced Human Behavior Simulation ---
const simulateHumanBehavior = async (page, progressTracker) => {
console.log("π§ Simulating human-like mouse movements and delays...");
const viewport = page.viewport();
for (let i = 0; i < 5; i++) {
const x = Math.random() * (viewport.width || 1920);
const y = Math.random() * (viewport.height || 1080);
await page.mouse.move(x, y, { steps: 10 });
await page.waitForTimeout(Math.random() * 1000 + 500);
}
// Random scroll a bit
await page.evaluate(() => {
window.scrollBy(0, Math.random() * 200 - 100);
});
await page.waitForTimeout(Math.random() * 2000 + 1000);
progressTracker?.updateProgress(progressTracker.progress + 1, 'humanizing', 'Human behavior simulated');
};
// --- Enhanced Cloudflare Bypass Function ---
const handleCloudflareChallenge = async (page, progressTracker) => {
progressTracker?.updateProgress(35, 'cloudflare', 'Detecting and bypassing Cloudflare...');
console.log("βοΈ Checking for Cloudflare challenge...");
const cloudflareSelectors = [
'#challenge-running',
'.cf-browser-verification',
'[data-ray]',
'#cf-challenge-running',
'.under-attack',
'iframe[src*="cloudflare"]',
'#challenge-form', // Added for JS challenge
'.cf-turnstile' // For Turnstile CAPTCHA
];
// Wait for any Cloudflare element to appear
let challengeDetected = false;
for (const selector of cloudflareSelectors) {
try {
await page.waitForSelector(selector, { timeout: 5000 });
challengeDetected = true;
console.log(`βοΈ Cloudflare challenge detected with selector: ${selector}`);
break;
} catch (e) {
// Continue to next selector
}
}
if (challengeDetected) {
// Simulate human behavior before attempting to solve
await simulateHumanBehavior(page, progressTracker);
// Wait for the challenge to resolve (JS execution)
console.log("β³ Waiting for Cloudflare challenge to complete...");
try {
await page.waitForFunction(() => {
const selectors = [
'#challenge-running',
'.cf-browser-verification',
'[data-ray]',
'#cf-challenge-running',
'.under-attack',
'#challenge-form',
'.cf-turnstile'
];
return !selectors.some(sel => document.querySelector(sel));
}, { timeout: 90000 }); // Increased timeout for slower challenges
} catch (e) {
console.log("β οΈ Standard wait failed, attempting Turnstile click...");
// Fallback: Check for and click Turnstile if present
try {
const cfInput = await page.$('[name="cf-turnstile-response"]');
if (cfInput) {
const parentItem = await cfInput.evaluateHandle((element) => element.parentElement);
const coordinates = await parentItem.boundingBox();
if (coordinates) {
await page.mouse.click(coordinates.x + 25, coordinates.y + coordinates.height / 2);
console.log("π±οΈ Clicked on Turnstile CAPTCHA");
await page.waitForTimeout(3000);
}
}
// Retry wait after click
await page.waitForFunction(() => {
const selectors = [
'#challenge-running',
'.cf-browser-verification',
'[data-ray]',
'#cf-challenge-running',
'.under-attack',
'#challenge-form',
'.cf-turnstile'
];
return !selectors.some(sel => document.querySelector(sel));
}, { timeout: 60000 });
} catch (clickError) {
console.error("β Turnstile click failed:", clickError.message);
throw new Error("Failed to bypass Cloudflare challenge. Try again later or use a proxy.");
}
}
// Additional wait for page to stabilize post-challenge with random delay
const randomDelay = (min, max) => Math.floor(Math.random() * (max - min + 1) + min);
await page.waitForTimeout(randomDelay(3000, 7000));
console.log("β
Cloudflare challenge bypassed successfully.");
progressTracker?.updateProgress(38, 'cloudflare', 'Cloudflare bypassed');
} else {
console.log("β
No Cloudflare challenge detected.");
}
};
// --- Puppeteer Logic (Updated for Enhanced Cloudflare Bypass) ---
const bypassCookiesAndRestrictions = async (page, progressTracker) => {
progressTracker?.updateProgress(5, 'bypassing', 'Setting up cookie bypass...');
console.log("πͺ Starting comprehensive cookie and restriction bypass...");
// Step 1: Set cookies before page load
const preCookies = [
{ name: 'cookieConsent', value: 'accepted', domain: '.studocu.com' },
{ name: 'cookie_consent', value: 'true', domain: '.studocu.com' },
{ name: 'gdpr_consent', value: 'accepted', domain: '.studocu.com' },
{ name: 'privacy_policy_accepted', value: 'true', domain: '.studocu.com' },
{ name: 'user_consent', value: '1', domain: '.studocu.com' },
{ name: 'analytics_consent', value: 'false', domain: '.studocu.com' },
{ name: 'marketing_consent', value: 'false', domain: '.studocu.com' },
{ name: 'functional_consent', value: 'true', domain: '.studocu.com' },
];
for (const cookie of preCookies) {
try {
await page.setCookie(cookie);
} catch (e) {
console.log(`Failed to set cookie ${cookie.name}:`, e.message);
}
}
// Step 2: Inject CSS to hide cookie banners immediately (Updated: Added more selectors for previews and blurred overlays)
await page.addStyleTag({
content: `
/* Hide all possible cookie banners */
[id*="cookie" i]:not(img):not(input), [class*="cookie" i]:not(img):not(input), [data-testid*="cookie" i], [aria-label*="cookie" i],
.gdpr-banner, .gdpr-popup, .gdpr-modal, .consent-banner, .consent-popup, .consent-modal, .privacy-banner, .privacy-popup, .privacy-modal,
.cookie-law, .cookie-policy, .cookie-compliance, .onetrust-banner-sdk, #onetrust-consent-sdk, .cmp-banner, .cmp-popup, .cmp-modal,
[class*="CookieBanner"], [class*="CookieNotice"], [class*="ConsentBanner"], [class*="ConsentManager"], .cc-banner, .cc-window, .cc-compliance,
div[style*="position: fixed"]:has-text("cookie"), div[style*="position: fixed"]:has-text("consent"), .fixed:has-text("cookie"), .fixed:has-text("consent") {
display: none !important;
visibility: hidden !important;
opacity: 0 !important;
z-index: -9999 !important;
pointer-events: none !important;
}
/* Remove blur and premium overlays, including previews */
[class*="blur" i], [class*="premium" i], [class*="paywall" i], [class*="sample-preview-blur" i], [class*="preview" i], [class*="blurred-container" i], [class*="blurred" i] {
display: none !important;
filter: none !important;
backdrop-filter: none !important;
opacity: 1 !important;
visibility: visible !important;
}
/* Ensure document content is visible */
.document-content, .page-content, [data-page] {
filter: none !important;
opacity: 1 !important;
visibility: visible !important;
pointer-events: auto !important;
}
/* Remove fixed overlays */
.fixed-overlay, .sticky-overlay, .content-overlay {
display: none !important;
}
/* Restore scrolling */
html, body {
overflow: auto !important;
position: static !important;
}
/* Hide Cloudflare elements if they persist */
#challenge-running, .cf-browser-verification, [data-ray], .under-attack {
display: none !important;
}
`
});
// Step 3: Inject JavaScript to handle dynamic cookie banners (Unchanged)
await page.evaluateOnNewDocument(() => {
// Override common cookie consent functions
window.cookieConsent = { accepted: true };
window.gtag = () => { };
window.ga = () => { };
window.dataLayer = [];
// Mutation observer to catch dynamically added cookie banners
const observer = new MutationObserver((mutations) => {
mutations.forEach((mutation) => {
mutation.addedNodes.forEach((node) => {
if (node.nodeType === 1) { // Element node
const element = node;
const text = element.textContent || '';
const className = element.className || '';
const id = element.id || '';
// Check if this looks like a cookie banner
if (
text.toLowerCase().includes('cookie') ||
text.toLowerCase().includes('consent') ||
text.toLowerCase().includes('privacy policy') ||
className.toLowerCase().includes('cookie') ||
className.toLowerCase().includes('consent') ||
className.toLowerCase().includes('gdpr') ||
id.toLowerCase().includes('cookie') ||
id.toLowerCase().includes('consent')
) {
console.log('Removing detected cookie banner:', element);
element.remove();
}
}
});
});
});
observer.observe(document.body, { childList: true, subtree: true });
// Set up periodic cleanup
setInterval(() => {
const cookieElements = document.querySelectorAll(`
[id*="cookie" i]:not(img):not(input), [class*="cookie" i]:not(img):not(input), [data-testid*="cookie" i],
.gdpr-banner, .consent-banner, .privacy-banner, .onetrust-banner-sdk, #onetrust-consent-sdk,
.cmp-banner, .cc-banner
`);
cookieElements.forEach(el => el.remove());
// Restore body scroll
document.body.style.overflow = 'auto';
document.documentElement.style.overflow = 'auto';
}, 1000);
});
progressTracker?.updateProgress(10, 'bypassing', 'Cookie bypass configured successfully');
return true;
};
const unblurContent = async (page, progressTracker) => {
progressTracker?.updateProgress(15, 'unblurring', 'Removing content restrictions...');
console.log("π Unblurring content and bypassing premium restrictions...");
await page.evaluate(() => {
const removeRestrictions = () => {
const removeBySelector = (selector) => {
document.querySelectorAll(selector).forEach(el => el.remove());
};
removeBySelector("#adbox, .adsbox, .ad-box, .banner-ads, .advert");
removeBySelector(".PremiumBannerBlobWrapper_overflow-wrapper__xsaS8");
// Added: Remove preview and blurred overlays
removeBySelector('[class*="preview" i], [class*="blurred-container" i], [class*="blurred" i]:not(img)');
const removeBlur = (element = document) => {
element.querySelectorAll("*").forEach(el => {
const style = window.getComputedStyle(el);
if (
style.filter?.includes("blur") ||
style.backdropFilter?.includes("blur") ||
parseFloat(style.opacity) < 1 ||
(el.className && el.className.toString().toLowerCase().includes("blur")) ||
(el.className && el.className.toString().toLowerCase().includes("premium"))
) {
el.style.filter = "none !important";
el.style.backdropFilter = "none !important";
el.style.opacity = "1 !important";
if (el.classList) {
el.classList.remove("blur", "blurred", "premium-blur");
}
}
});
};
removeBlur();
const contentSelectors = [
'.document-content', '.page-content', '.content', '[data-page]', '[data-testid*="document"]',
'[data-testid*="page"]', '.page', '.document-page', 'main', 'article'
];
contentSelectors.forEach(selector => {
document.querySelectorAll(selector).forEach(el => {
el.style.setProperty('filter', 'none', 'important');
el.style.setProperty('opacity', '1', 'important');
el.style.setProperty('visibility', 'visible', 'important');
el.style.setProperty('display', 'block', 'important');
el.style.setProperty('pointer-events', 'auto', 'important');
});
});
};
removeRestrictions();
const intervalId = setInterval(removeRestrictions, 1000);
setTimeout(() => clearInterval(intervalId), 30000);
});
progressTracker?.updateProgress(20, 'unblurring', 'Content restrictions removed');
};
// New function to fetch clear images by modifying blurred URLs
const fetchClearImages = async (page, progressTracker) => {
progressTracker?.updateProgress(65, 'unblurring_images', 'Fetching clear page images...');
console.log("πΌοΈ Modifying blurred image URLs to fetch clear versions...");
await page.evaluate(() => {
const images = document.querySelectorAll('img[src*="/blurred/"]');
images.forEach(img => {
img.src = img.src.replace(/\/blurred\//, '/');
console.log(`Modified image src: ${img.src}`);
});
});
// Wait for the modified images to load
await page.evaluate(async () => {
const images = Array.from(document.querySelectorAll('img'));
await Promise.all(images.map(img => {
if (img.complete) return Promise.resolve();
return new Promise((resolve) => {
img.addEventListener('load', resolve);
img.addEventListener('error', resolve);
setTimeout(resolve, 10000);
});
}));
});
await new Promise(resolve => setTimeout(resolve, 3000)); // Additional delay for stability
progressTracker?.updateProgress(70, 'unblurring_images', 'Clear images loaded');
};
const applyPrintStyles = async (page, progressTracker) => {
progressTracker?.updateProgress(85, 'styling', 'Applying print styles...');
console.log("π¨οΈ Applying print styles for clean PDF...");
await page.evaluate(() => {
const style = document.createElement("style");
style.id = "print-style-extension";
style.innerHTML = `
@page {
/* Set page size to A4 and remove default margins */
size: A4 portrait;
margin: 0mm;
}
@media print {
html, body {
/* Ensure the body takes the full width and has no extra padding/margin */
width: 210mm !important;
height: auto !important;
margin: 0 !important;
padding: 0 !important;
overflow: visible !important;
background: white !important;
color: black !important;
display: flex;
justify-content: center;
}
/* Remove all unwanted elements like headers, footers, sidebars, etc. */
header, footer, nav, aside, .no-print, .ads, .sidebar, .premium-banner,
[class*="Header"], [class*="Footer"], [class*="Sidebar"], [id*="Header"],
.ViewerToolbar, .Layout_info-bar-wrapper__He0Ho, .Sidebar_sidebar-scrollable__kqeBZ,
.HeaderWrapper_header-wrapper__mCmf3, .Layout_visible-content-bottom-wrapper-sticky__yaaAB,
.Layout_bottom-section-wrapper__yBWWk, .Layout_footer-wrapper__bheJQ,
.InlineBanner_inline-banner-wrapper__DAi5X, .banner-wrapper, #top-bar-wrapper,
.Layout_sidebar-wrapper__unavM, .Layout_is-open__9DQr4 {
display: none !important;
}
/* Force all elements to have a transparent background and no shadow */
* {
box-shadow: none !important;
background: transparent !important;
color: inherit !important;
}
/*
* KEY FIX: Target the main document container.
* Force it to be a block element, remove any transforms or max-widths,
* and center it perfectly within the page.
*/
.Viewer_document-wrapper__JPBWQ, .Viewer_document-wrapper__LXzoQ,
.Viewer_document-wrapper__XsO4j, .page-content, .document-viewer, #page-container {
position: static !important;
display: block !important;
width: 100% !important;
max-width: none !important;
margin: 0 auto !important; /* Center horizontally */
padding: 0 !important;
box-sizing: border-box; /* Include padding in width calculation */
transform: none !important;
}
/* Ensure individual pages and images within the document use the full width */
[data-page], .page, .document-page, img {
page-break-after: always !important;
page-break-inside: avoid !important;
page-break-before: avoid !important;
width: 100% !important;
max-width: 100% !important;
height: auto !important;
display: block !important;
margin: 0 !important;
padding: 0 !important;
}
}
`;
document.head.appendChild(style);
});
progressTracker?.updateProgress(88, 'styling', 'Print styles applied successfully');
};
const studocuDownloader = async (url, options = {}, progressTracker = null) => {
let browser;
try {
progressTracker?.updateProgress(0, 'initializing', 'Starting browser...');
console.log("π Launching browser with enhanced stealth configuration...");
browser = await puppeteerExtra.launch({ // UPDATED: Use puppeteerExtra
headless: true,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-accelerated-2d-canvas',
'--no-first-run',
'--no-zygote',
'--disable-gpu',
'--disable-features=VizDisplayCompositor',
'--disable-background-networking',
'--disable-background-timer-throttling',
'--disable-renderer-backgrounding',
'--disable-backgrounding-occluded-windows',
'--disable-ipc-flooding-protection',
'--disable-web-security',
'--disable-features=site-per-process',
'--disable-blink-features=AutomationControlled',
'--disable-extensions',
'--ignore-certificate-errors',
// NEW: Additional args for better Cloudflare evasion
'--disable-features=TranslateUI',
'--disable-ipc-flooding',
'--disable-backgrounding-occluded-windows',
'--disable-renderer-backgrounding',
'--disable-features=TranslateUI,BlinkGenPropertyTrees',
'--metrics-recording-only',
'--no-default-browser-check',
'--safebrowsing-disable-auto-update',
'--password-store=basic',
'--use-mock-keychain'
],
ignoreHTTPSErrors: true,
timeout: 300000,
});
const page = await browser.newPage();
progressTracker?.updateProgress(2, 'initializing', 'Configuring browser settings...');
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
await page.setViewport({ width: 1920, height: 1080 }); // NEW: Use full HD for more realistic viewport, adjust back if needed for A4
// NOTE: Stealth plugin handles most of this, but keeping for extra safety
await page.evaluateOnNewDocument(() => {
Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] });
// NEW: Additional stealth evasions
Object.defineProperty(navigator, 'permissions', {
get: () => ({
query: () => Promise.resolve({ state: 'granted' })
})
});
window.chrome = {
runtime: {},
loadTimes: function () { },
csi: function () { },
app: {}
};
});
// Set up cookie and content bypass
await bypassCookiesAndRestrictions(page, progressTracker);
// Block unnecessary resources (UPDATED: Loosened for Cloudflare - allow cloudflare.com requests)
await page.setRequestInterception(true);
page.on('request', (req) => {
const resourceType = req.resourceType();
const reqUrl = req.url().toLowerCase();
if (resourceType === 'document') {
req.continue();
return;
}
// NEW: Always allow Cloudflare-related requests
if (reqUrl.includes('cloudflare') || reqUrl.includes('cf-')) {
req.continue();
return;
}
if (
['image', 'media', 'font', 'stylesheet'].includes(resourceType) && // Block non-essential images/media/fonts/styles early if not core
!reqUrl.includes('document') && !reqUrl.includes('page') && !reqUrl.includes('studocu') || // Allow core document images
resourceType === 'script' && !reqUrl.includes('studocu') && !reqUrl.includes('cloudflare') || // Block third-party scripts except Cloudflare
reqUrl.includes('doubleclick') ||
reqUrl.includes('googletagmanager') ||
reqUrl.includes('facebook.com') ||
reqUrl.includes('twitter.com') ||
reqUrl.includes('analytics') ||
reqUrl.includes('gtm') ||
reqUrl.includes('hotjar') ||
reqUrl.includes('mixpanel') ||
reqUrl.includes('onetrust') ||
reqUrl.includes('cookielaw') ||
(resourceType === 'other' && reqUrl.includes('/track/'))
) {
req.abort();
} else {
req.continue();
}
});
// Login if credentials provided
if (options.email && options.password) {
progressTracker?.updateProgress(12, 'authenticating', 'Logging into StuDocu...');
console.log("π Logging in to StuDocu...");
await page.goto('https://www.studocu.com/en-us/login', { waitUntil: 'domcontentloaded', timeout: 120000 });
// NEW: Handle potential Cloudflare on login page
await handleCloudflareChallenge(page, progressTracker);
await page.waitForSelector('#email', { timeout: 15000 });
await page.type('#email', options.email);
await page.type('#password', options.password);
await page.click('button[type="submit"]');
try {
await page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 30000 });
await page.waitForSelector('.user-profile, [data-testid="user-menu"]', { timeout: 10000 });
console.log("β
Login successful.");
progressTracker?.updateProgress(18, 'authenticated', 'Login successful');
} catch (e) {
console.error("β Login failed:", e.message);
throw new Error("Login failed. Check credentials or try again.");
}
}
// Removed homepage visit as it's not strictly necessary for session setup; directly navigate to URL
progressTracker?.updateProgress(30, 'navigating', 'Navigating to document...');
console.log(`π Navigating to ${url}...`);
let navigationSuccess = false;
let attempts = 0;
const maxAttempts = 3; // Reduced from 5 to minimize retries
while (!navigationSuccess && attempts < maxAttempts) {
try {
attempts++;
progressTracker?.updateProgress(30 + (attempts * 5), 'navigating', `Navigation attempt ${attempts}/${maxAttempts}`);
console.log(`Navigation attempt ${attempts}/${maxAttempts}`);
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 120000 }); // Increased from 60000
navigationSuccess = true;
} catch (e) {
console.log(`Navigation attempt ${attempts} failed:`, e.message);
if (attempts >= maxAttempts) throw e;
await new Promise(resolve => setTimeout(resolve, 10000)); // Increased retry delay to 10s for stability
}
}
// NEW: Handle Cloudflare after navigation
await handleCloudflareChallenge(page, progressTracker);
progressTracker?.updateProgress(40, 'loading', 'Page loaded, waiting for content...');
await new Promise(resolve => setTimeout(resolve, 5000)); // Increased from 2000ms for better loading
// Apply content unblurring
await unblurContent(page, progressTracker);
// Wait for document content
progressTracker?.updateProgress(45, 'loading', 'Waiting for document content...');
console.log("β³ Waiting for document content to load...");
const contentSelectors = [
'.document-content', '.page-content', '[data-page]', '[data-testid*="document"]',
'img[src*="document"]', 'img[src*="page"]', '.page', 'main img', 'article img'
];
let contentFound = false;
for (const selector of contentSelectors) {
try {
await page.waitForSelector(selector, { timeout: 20000 }); // Increased from 10000
console.log(`β
Found content with selector: ${selector}`);
contentFound = true;
break;
} catch (e) {
console.log(`β Selector ${selector} not found, trying next...`);
}
}
if (!contentFound) {
console.log("β οΈ No specific content selector found, proceeding with page content...");
}
// Enhanced scrolling to load all content (Optimized: Increased scroll distance, reduced delays)
progressTracker?.updateProgress(50, 'scrolling', 'Loading all document pages...');
console.log("π Loading all document pages with enhanced slow scroll...");
await page.evaluate(async () => {
const delay = (ms) => new Promise((res) => setTimeout(res, ms));
let scrollHeight = document.body.scrollHeight;
while (true) {
let totalHeight = 0;
const distance = 600; // Increased from 300 for faster coverage
while (totalHeight < scrollHeight) {
window.scrollBy(0, distance);
totalHeight += distance;
await delay(300); // Increased from 200ms for large docs stability
}
await delay(2000); // Increased from 1000ms
const newHeight = document.body.scrollHeight;
if (newHeight === scrollHeight) break;
scrollHeight = newHeight;
}
window.scrollTo({ top: 0, behavior: "smooth" });
await delay(1000); // Increased from 500ms
});
// Re-apply unblur after loading new content
await unblurContent(page, progressTracker);
// New: Fetch clear images for blurred pages
await fetchClearImages(page, progressTracker);
// Wait for all images to load (Optimized: Reduced per-image timeout, parallel wait)
progressTracker?.updateProgress(75, 'loading_images', 'Loading images...');
console.log("πΌοΈ Waiting for all images to load...");
await page.evaluate(async () => {
const images = Array.from(document.querySelectorAll('img'));
await Promise.all(images.map(img => {
if (img.complete) return Promise.resolve();
return new Promise((resolve) => {
img.addEventListener('load', resolve);
img.addEventListener('error', resolve);
setTimeout(resolve, 10000); // Increased from 5000ms for large docs
});
}));
});
await new Promise(resolve => setTimeout(resolve, 5000)); // Increased from 2000ms
progressTracker?.updateProgress(80, 'finalizing', 'Preparing document for PDF generation...');
// Set exact height
await page.evaluate(() => {
const getDocumentHeight = () => Math.max(
document.body.scrollHeight, document.body.offsetHeight,
document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight
);
const height = getDocumentHeight();
document.body.style.height = `${height}px !important`;
document.documentElement.style.height = `${height}px !important`;
document.body.style.overflow = 'hidden !important';
});
// Content verification (Unchanged, as it's quick)
const contentCheck = await page.evaluate(() => {
const textContent = document.body.textContent || '';
const images = document.querySelectorAll('img');
const documentImages = Array.from(images).filter(img =>
img.src.includes('document') || img.src.includes('page') ||
img.alt.includes('document') || img.alt.includes('page')
);
return {
totalText: textContent.length,
totalImages: images.length,
documentImages: documentImages.length,
hasDocumentContent: documentImages.length > 0 || textContent.length > 1000
};
});
console.log("π Content verification:", {
textLength: contentCheck.totalText,
images: contentCheck.totalImages,
documentImages: contentCheck.documentImages,
hasContent: contentCheck.hasDocumentContent
});
if (!contentCheck.hasDocumentContent) {
console.warn("β οΈ Warning: Limited document content detected.");
}
// Apply print styles and generate PDF
await applyPrintStyles(page, progressTracker);
await page.emulateMediaType('print');
progressTracker?.updateProgress(90, 'generating', 'Generating PDF...');
console.log("π Generating PDF...");
const pdfBuffer = await page.pdf({
printBackground: true,
preferCSSPageSize: true, // Use the @page size
displayHeaderFooter: false,
timeout: 180000, // Increased back to 180000 for large PDFs
scale: 1,
omitBackground: false
});
progressTracker?.updateProgress(100, 'completed', 'PDF generated successfully!');
console.log(`β
PDF generated successfully! Size: ${(pdfBuffer.length / 1024 / 1024).toFixed(2)} MB`);
return pdfBuffer;
} catch (error) {
progressTracker?.updateProgress(-1, 'error', error.message);
console.error("β Error during PDF generation:", error);
throw error;
} finally {
if (browser) {
console.log("π Closing browser...");
try {
await browser.close();
} catch (e) {
console.log("Error closing browser:", e.message);
}
}
}
};
// --- API Routes --- (Unchanged)
app.post('/api/request-download', (req, res) => {
const { url, email, password } = req.body;
if (!url || !url.includes('studocu.com')) {
return res.status(400).json({ error: 'Please provide a valid StuDocu URL.' });
}
const sessionId = Date.now().toString();
const progressTracker = new ProgressTracker(sessionId);
progressTrackers.set(sessionId, progressTracker);
downloadJobs.set(sessionId, { status: 'processing' });
console.log(`π― Processing request for: ${url} [Session: ${sessionId}]`);
// Respond to the client immediately with the session ID
res.json({ sessionId });
// --- Start the PDF generation in the background ---
studocuDownloader(url, { email, password }, progressTracker)
.then(pdfBuffer => {
// Store the successful result
downloadJobs.set(sessionId, { status: 'completed', buffer: pdfBuffer });
progressTrackers.delete(sessionId); // Clean up live tracker
})
.catch(error => {
// Store the error
downloadJobs.set(sessionId, { status: 'error', message: error.message });
progressTrackers.delete(sessionId); // Clean up live tracker
});
});
app.get('/api/progress/:sessionId', (req, res) => {
const { sessionId } = req.params;
const tracker = progressTrackers.get(sessionId);
if (tracker) {
// Job is in progress, return live data
return res.json({
sessionId,
progress: tracker.progress,
status: tracker.status,
message: tracker.message,
timestamp: new Date().toISOString()
});
}
const job = downloadJobs.get(sessionId);
if (job) {
// Job is finished, return final state
if (job.status === 'completed') {
return res.json({ sessionId, progress: 100, status: 'completed', message: 'PDF generated successfully!' });
}
if (job.status === 'error') {
return res.json({ sessionId, progress: -1, status: 'error', message: job.message });
}
}
return res.status(404).json({ error: 'Session not found' });
});
app.get('/api/download/:sessionId', (req, res) => {
const { sessionId } = req.params;
const job = downloadJobs.get(sessionId);
if (!job) {
return res.status(404).json({ error: 'Download session not found or expired.' });
}
if (job.status === 'processing') {
return res.status(400).json({ error: 'Download is still processing.' });
}
if (job.status === 'error') {
return res.status(500).json({ error: `Failed to generate PDF: ${job.message}` });
}
if (job.status === 'completed' && job.buffer) {
res.setHeader('Content-Type', 'application/pdf');
res.setHeader('Content-Disposition', 'attachment; filename=studocu-document.pdf');
res.send(job.buffer);
// Optional: Clean up the job after download to save memory
// downloadJobs.delete(sessionId);
} else {
res.status(500).json({ error: 'An unknown error occurred.' });
}
});
// --- Health and Info Endpoints (Unchanged) ---
app.get('/health', (req, res) => {
res.json({
status: 'healthy',
timestamp: new Date().toISOString(),
uptime: process.uptime(),
activeDownloads: progressTrackers.size
});
});
app.get('/', (req, res) => {
res.json({
message: 'π Enhanced StuDocu Downloader API v5.3 - Real-time Progress Tracking with Cloudflare Bypass',
version: '5.3.0',
features: [
'πͺ Advanced cookie banner bypass',
'π Premium content unblurring',
'π Login support for full access',
'π Real-time progress tracking via polling',
'π Clean PDF generation with print styles',
'π΅οΈ Enhanced stealth to evade bot detection',
'βοΈ Automatic Cloudflare challenge handling',
'π§ Human-like behavior simulation'
],
endpoints: {
request: 'POST /api/request-download (body: {url, filename?, email?, password?})',
progress: 'GET /api/progress/:sessionId',
download: 'GET /api/download/:sessionId',
health: 'GET /health'
}
});
});
process.on('SIGTERM', () => {
console.log('SIGTERM received, shutting down gracefully...');
process.exit(0);
});
process.on('SIGINT', () => {
console.log('SIGINT received, shutting down gracefully...');
process.exit(0);
});
app.listen(port, () => {
console.log(`π Enhanced StuDocu Downloader v5.3.0 running on http://localhost:${port}`);
console.log(`β¨ Features: Real-time progress tracking, enhanced stealth, Cloudflare bypass, and user feedback`);
}); |