User-agent: Mediapartners-Google* Disallow: User-agent: bingbot Disallow: User-agent: Slurp Disallow: User-agent: * Disallow: /administrator/ Disallow: /cache/ Disallow: /component/ Disallow: /components/ Disallow: /images/ Disallow: /includes/ Disallow: /installation/ Disallow: /language/ Disallow: /libraries/ Disallow: /logs/ Disallow: /media/ Disallow: /modules/ Disallow: /plugins/ Disallow: /templates/ Disallow: /tmp/ # Temp listing to be removed by Google Disallow: /cse/ Disallow: /fre/ Disallow: /jfusion Disallow: /places-2 Disallow: /places-3 Disallow: /places3 Disallow: /places-top/ Disallow: /vacations/ # Disallow: /desserts?start # Disallow: /en/ Disallow: /cgi-bin/ Disallow: /country/ Disallow: /search/ Disallow: /subs/ Disallow: /advanced-search/ Disallow: /social-networking/ # These do not exist (moved or now part of sub-domain) Disallow: /A Disallow: /B Disallow: /C Disallow: /D Disallow: /E Disallow: /F Disallow: /G Disallow: /H Disallow: /I Disallow: /J Disallow: /K Disallow: /L Disallow: /M Disallow: /N Disallow: /O Disallow: /P Disallow: /Q Disallow: /R Disallow: /S Disallow: /T Disallow: /U Disallow: /V Disallow: /W Disallow: /X Disallow: /Y Disallow: /Z # These do not exist Disallow: /0 Disallow: /1 Disallow: /2 Disallow: /3 Disallow: /4 Disallow: /5 Disallow: /6 Disallow: /7 Disallow: /8 Disallow: /9 # Google Disallow: /community-builder/ Disallow: /content/section/ Disallow: /spain/ # ------[ Save for Future Usage --------- # Google-acceptable regex * and $ # Use for multi-listing spelling or category changes (block the old way) # Disallow: /restaurants/*-applebees$ # Disallow: /fast-foods/*-arbys$ # --------------------------------------- Disallow: /latest-reviews/menu/* Disallow: /*&order=date Disallow: /*order:discussed/ Disallow: /*order:helpful/ Disallow: /*order:rating/ Disallow: /*order:rating/device:xhtml/ Disallow: /*order:rrating/ # 80legs.com/webcrawler.html User-agent: 008 Disallow: / User-agent: Aboundexbot Disallow: / # Choopa.net user-agent: AhrefsBot disallow: / # Does not respect robots.txt #User-agent: Amazonaws #Disallow: / # Spiders a lot but do not include in their index (wastes bandwidth) User-agent: Baiduspider Crawl-delay: 20 # Is not a functioning search site User-agent: discobot Disallow: / # Does not respect robots.txt User-agent: Exabot Disallow: / # https://www.facebook.com/externalhit_uatext.php User-agent: facebookexternalhit Crawl-delay: 5 User-agent: GomezAgent Disallow: / # No benefit User-agent: netseer Disallow: / User-agent: MJ12bot Disallow: / # stanford.edu - any purpose? User-agent: WebVac Disallow: / # To help Google remove non-existent pages User-agent: * Disallow: /?subid= Disallow: /">Travel.Com</a><o:p></o:p></span></p> Disallow: /9bposv Disallow: /article/ Disallow: /aviation/airline/al.htm Disallow: /aviation/avap.html Disallow: /charleroi/charleroi-youngcity.htm Disallow: /consulat/shanghai.htm Disallow: /index.com/ Disallow: /kesenian-suku-dayak Disallow: /newspapers?id=SP4xAAAAIBAJ Disallow: /sports/disabled/ Disallow: /spring/welcome.htm Disallow: /2014/ Disallow: /2013/ Disallow: /2012/ Disallow: /2011/ Disallow: /2010/ Disallow: /2009/ Disallow: /2008/ Disallow: /2007/ Disallow: /2006/ Disallow: /2005/ Disallow: /2004/ Disallow: /2003/