################################################################################ # CodeBitch's Analog configuration file: last modified: 9:15pm 2003-03-10 AEST ################################################################################ # # Look at the Readme.html file or goto # http://www.statslab.cam.ac.uk/~sret1/analog/ # or the latest Macintosh version page # http://summary.net/soft/analog.html for more information. # # Major service providers that have unique names for each user connection # This list is by no means complete and some people will want to turn it off # HOSTALIAS *.proxy.aol.com proxy.aol.com HOSTALIAS *.ix.netcom.com ix.netcom.com HOSTALIAS *.netline.net netline.net HOSTALIAS *.dynip.alter.net dynip.alter.net HOSTALIAS *.ms.uu.net ms.uu.net HOSTALIAS *.compuserve.com compuserve.com HOSTALIAS *.pipeline.com pipeline.com HOSTALIAS *.dial-access.att.net dial-access.att.net HOSTALIAS *.mhv.net mhv.net HOSTALIAS *-ext.prodigy.com ext.prodigy.com HOSTALIAS *.austin.io.com austin.io.com HOSTALIAS *.cis.upenn.edu cis.upenn.edu HOSTALIAS *.erols.com erols.com HOSTALIAS tide*.microsoft.com tide.microsoft.com HOSTALIAS ip-pdx*.teleport.com ip-pdx.teleport.com HOSTALIAS *.concentric.net concentric.net HOSTALIAS *.earthlink.net earthlink.net HOSTALIAS *.epix.net client.epix.net HOSTALIAS *.midwest.net client.midwest.net HOSTALIAS pburg*.planet.net pburg.planet.net # # Major search engines and service providers that have many names for # their service # REFALIAS http://webcrawler.com/* http://www.webcrawler.com/* REFALIAS http://*excite.com/search.gw http://www.excite.com/search.gw REFALIAS http://*.lycos.com/cgi-bin/pursuit http://www.lycos.com/cgi-bin/pursuit REFALIAS http://guide-p.infoseek.com*Titles http://guide-p.infoseek.com/Titles/ REFALIAS http://guide-p.infoseek.com*DB http://guide-p.infoseek.com/DB/ REFALIAS http://www?.infoseek.com*Titles http://guide-p.infoseek.com/Titles/ REFALIAS http://altavista.digital.com/* http://www.altavista.digital.com/* REFALIAS http://search.yahoo.com/* http://search.yahoo.com/ REFALIAS http://www.yahoo.com/text/* http://www.yahoo.com/* REFALIAS http://msn.yahoo.com/msn/* http://www.yahoo.com/* REFALIAS http://gnn.yahoo.com/gnn/* http://www.yahoo.com/* REFALIAS http://members.aol.com/* http://users.aol.com/* REFALIAS http://www?.metacrawler.com/cgi-bin/nph-query http://www.metacrawler.com/cgi-bin/nph-query # # Browser name translations in the summary report # # NB can't have *crawl* in there as this would exclude MSIE web archiving from IE # Uncomment this line if using Analog below version 5.1 # BROWOUTPUTALIAS Mozilla Netscape BROWOUTPUTALIAS "Mozilla (compatible)" "Netscape (compatible)" BROWOUTPUTALIAS IWENG AOL # NB: Analog 4.1 and above finds all Opera browsers fine (even when semi-spoofed) BROWALIAS REGEXP:.*(AvantGo.(.*);).* "AvantGo/$2 Handheld" BROWALIAS REGEXP:.*(AvantGo.(.*)\)).* "AvantGo/$2 Handheld" BROWALIAS "Mozilla/3.0 (compatible)" "AvantGo Sync" # Analog 5.1 and above finds Konqueror itself #BROWALIAS REGEXP:.*(Konqueror.([^\s;\)]*);.*)X11\) "Konqueror/$2 Unix" #BROWALIAS REGEXP:.*(Konqueror.([^\s;\)]*);.)([^\s;\)]*)\) "Konqueror/$2 $3" BROWALIAS "Mozilla/5.0 (compatible; AvantGo 3.2; ProxiNet; Danger hiptop 1.0)" "Danger hiptop 1.0" BROWALIAS REGEXP:.+iCab.([^s^;]*);.(.*) "iCab/$1 ($2" BROWALIAS REGEXP:.+iCab.([0-9\.]*)(.*) "iCab/$1 ($2" # no longer required from version 5.32 #BROWALIAS REGEXP:.*Safari/([0-9]*) "Safari/$1 Mac_PowerPC" #BROWALIAS REGEXP:.*Camino/([0-9\.]*) "Camino/$1 Mac_PowerPC" BROWALIAS REGEXP:.*(NetPositive.(.*);).* "NetPositive/$2 BeOS" BROWALIAS REGEXP:.*(NetPositive.(.*)\)).* "NetPositive/$2 BeOS" BROWALIAS REGEXP:.*(OmniWeb.([^\s;\)]*))\)(.*)\) "OmniWeb/$2 $3 Mac_PowerPC" BROWALIAS REGEXP:.*(OmniWeb.([^\s;\)]*));(.*)\) "OmniWeb/$2 $3 Mac_PowerPC" BROWALIAS REGEXP:.*(OmniWeb.([^;\s\)]*))(.*) "OmniWeb/$2 $3 Mac_PowerPC" BROWALIAS REGEXP:.*OmniWeb;.* "OmniWeb/4 Mac_PowerPC" BROWALIAS *WannaBe* "WannaBe/1 Mac_PowerPC" BROWALIAS REGEXP:.*(Planetweb.([^\s;\)]*);).* "Dreamcast_PlanetWeb/$2 SegaSaturn" BROWALIAS REGEXP:.*(Planetweb.(.*);).* "Dreamcast_PlanetWeb/$2 SegaSaturn" BROWALIAS REGEXP:.*(MSPIE.(.*);).* "IE_for_WinCE/$2 Windows CE" BROWALIAS REGEXP:.*(Netbox.(.*);).* "Netbox/$2 Kiosk" BROWALIAS REGEXP:.*(NaviTel.(.*);).* "NaviTel/$2 Kiosk" BROWALIAS REGEXP:.*RISC.OS.(.*);.Oregano.(.*) "Oregano/$2 RISC OS $1" BROWALIAS REGEXP:.*EudoraWeb.([^\s;\)]*).*PalmOS.([^\s;\)]*).* "EudoraWeb/$1 PalmOS $2" BROWALIAS REGEXP:.*I-Opener.([^\s;\)]*).* "I-Opener/$1 NetPliance" BROWALIAS *portalmmm* "portalmmm/1 i-Mode_Phone" BROWALIAS *BookmarkTool* "Offline MacOSX" BROWALIAS *OfflineExplorer* Offline BROWALIAS *WebStripper* Offline BROWALIAS *NetAnts* Offline BROWALIAS *WebCascader* Offline BROWALIAS *GetRight* Offline BROWALIAS *GetLeft* Offline BROWALIAS *WinHttpRequest* Offline BROWALIAS REGEXP:Links.\((^\s);.(.*)\) "Links/$1 ($2)" ##################### # Text-based browsers # Include: Lynx, Links, WannaBe, Dillo, W3M, # # Offline browsers # Include WebStripped, Bookmark managers, gnome-vfs # Cleaning up annoying Netscape 4 tag bumf # BROWALIAS REGEXP:Mozilla/([^C]*)C-CCK-MCD(.*) "Mozilla/$1 $2" BROWALIAS REGEXP:Mozilla/([^C]*)C-UDP(.*) "Mozilla/$1 $2" BROWALIAS REGEXP:Mozilla/([^C]*)C-SGI(.*) "Mozilla/$1 $2" BROWALIAS REGEXP:Mozilla/([^C]*)C-NSCP(.*) "Mozilla/$1 $2" BROWALIAS REGEXP:Mozilla/([^C]*)C-({.*) "Mozilla/$1 $2" BROWALIAS REGEXP:Mozilla/([^C]*)C-([0-9].*) "Mozilla/$1 $2" BROWALIAS REGEXP:Mozilla/([^C]*)C-([A-Z].*) "Mozilla/$1 $2" BROWALIAS REGEXP:Mozilla/([\.0-9]*)({.*) "Mozilla/$1 $2" BROWALIAS "Mozilla" "Mozilla/5.0" # Catch Netscape 7 # Uncomment if using Analog version before 5.24 # BROWALIAS REGEXP:(.*)Netscape/7.0b1 "Netscape/7.0b1 $1 " #BROWALIAS REGEXP:Microsoft.URL.Control.-.(.*) "Microsoft URL Control/$1" BROWALIAS *WebWasher* "WebWasher/1 Privacy" # WebWasher is a browser privacy add-on # Although these are human users, their user agent information is not reliable # see http://www.pacificnet.net/~bbruce/user_agent.htm BROWALIAS *SpaceBison* "Proximotron/1 Privacy" BROWALIAS *ShonenKnife* "Proximotron/1 Privacy" # Proximotron does similarly http://www.flaaten.dk/prox/features.html BROWALIAS *SuperCleaner* "SuperCleaner/1 Privacy" # See http://www.southbaypc.com/SuperCleaner/ BROWALIAS *teradex* Privacy # See http://www.sendfakemail.com/fakemail/antispam.html BROWALIAS "Mozilla/3.Mozilla/2.01 (Win95; I)" Spammer BROWALIAS *Email* Spammer # see http://support.free-conversant.com/2701 BROWALIAS *CherryPicker* Spammer BROWALIAS *ExtractorPro* Spammer BROWALIAS "Crescent Internet ToolPak HTTP OLE Control v.1.0" Spammer BROWALIAS "Mozilla/2.0 (compatible; NEWT ActiveX; Win32)" Spammer #BROWALIAS "Mozilla/3.01-C-MACOS8 (Macintosh; I; PPC)" SearchEngineSpider BROWALIAS REGEXPI:.*bandit.* Spammer # see http://www.webmasterworld.com/forum11/1032.htm BROWALIAS *Surf15a* Spammer BROWALIAS *DBrowse* Spammer BROWALIAS *PBrowse* Spammer BROWALIAS *PEval* Spammer BROWALIAS *Vayala* Spammer BROWALIAS *MailVampire* Spammer # see http://www.psychedelix.com/agents.html BROWALIAS REGEXPI:MFC.Foundation.Class.Library.* Spammer # incomplete Netscape strings banging out HEAD requests BROWALIAS "Mozilla/4.7" Spammer # Random User Agent strings BROWALIAS REGEXP:[A-Z]{6,} Spammer # see http://www.fleiner.com/bots/trapped.html BROWALIAS REGEXP:.*Microsoft.URL.Control.* Spammer BROWALIAS "Mail Sweeper" Spammer # Spiders and robots # Note, many of these are redundantly caught by full name and "bot" # # This first one is IBM crawler http://www.almaden.ibm.com/cs/crawler BROWALIAS REGEXP:.*Fast.Crawler.* SearchEngineSpider BROWALIAS *almaden* SearchEngineSpider BROWALIAS *ah-ha.com* SearchEngineSpider BROWALIAS *ASPSeek* SearchEngineSpider BROWALIAS *ASPseek* SearchEngineSpider BROWALIAS *AlkalineBOT* SearchEngineSpider BROWALIAS *Atomz* SearchEngineSpider BROWALIAS *Bot* SearchEngineSpider BROWALIAS *BOT* SearchEngineSpider BROWALIAS *Brother* SearchEngineSpider BROWALIAS *BunnySlippers* SearchEngineSpider BROWALIAS *Capture* SearchEngineSpider BROWALIAS *Claymont* SearchEngineSpider BROWALIAS *Crawler* SearchEngineSpider BROWALIAS *crawleradmin@icc.melco.co.jp* SearchEngineSpider BROWALIAS *newsearchengine* SearchEngineSpider BROWALIAS *cosmos* SearchEngineSpider BROWALIAS *CyberSpyder* SearchEngineSpider BROWALIAS *DaviesBot* SearchEngineSpider BROWALIAS *DeepIndex* SearchEngineSpider BROWALIAS *DiaGem* SearchEngineSpider BROWALIAS REGEXPI:.*Fetch.API.Request.* SearchEngineSpider BROWALIAS *FAST-WebCrawler* SearchEngineSpider BROWALIAS *Eming* SearchEngineSpider BROWALIAS *GNODSPIDER* SearchEngineSpider BROWALIAS *Getweb* Getweb BROWALIAS *grub-client* SearchEngineSpider BROWALIAS *Googlebot* SearchEngineSpider BROWALIAS *Gulliver* SearchEngineSpider BROWALIAS *Hatena* SearchEngineSpider BROWALIAS *HTTrack* SearchEngineSpider BROWALIAS *httpGlooton* SearchEngineSpider BROWALIAS *Harvest* SearchEngineSpider BROWALIAS *InfoSeek* SearchEngineSpider BROWALIAS *Infoseek* SearchEngineSpider BROWALIAS *IP*Works!* SearchEngineSpider BROWALIAS *Inktomi* SearchEngineSpider BROWALIAS *InternetLinkAgent* SearchEngineSpider BROWALIAS REGEXPI:Internet.Ninja.* SearchEngineSpider BROWALIAS *InternetSeer* SearchEngineSpider BROWALIAS *IPiumBot* SearchEngineSpider BROWALIAS *Jack* SearchEngineSpider BROWALIAS "Java1.1.8" SearchEngineSpider BROWALIAS *Jeeves* SearchEngineSpider BROWALIAS *JennyBot* SearchEngineSpider BROWALIAS *Jigsaw* SearchEngineSpider # see http://www.slac.stanford.edu/xorg/nmtf/nmtf-tools.html BROWALIAS *Lachesis* SearchEngineSpider BROWALIAS *LinkWalker* SearchEngineSpider BROWALIAS REGEXP:.*Master.com.Watchdog.* SearchEngineSpider BROWALIAS *MacPiCkS* SearchEngineSpider BROWALIAS *Merc* SearchEngineSpider BROWALIAS *Mercator* SearchEngineSpider BROWALIAS *crawler@metacarta.com* SearchEngineSpider BROWALIAS *MitakeWebIndexer* SearchEngineSpider BROWALIAS *MuscatFerret* SearchEngineSpider # This should only pick up fake Moz's without any other code BROWALIAS "Mozilla/4.0" SearchEngineSpider BROWALIAS *NetMind* SearchEngineSpider BROWALIAS *Netcraft* NetcraftSpider BROWALIAS *NutchCrawler* SearchEngineSpider BROWALIAS *OpenTextSiteCrawler* SearchEngineSpider BROWALIAS *PlantyNet* SearchEngineSpider BROWALIAS REGEXP:.*PerMan.Surfer.([\.0-9]*)(.*) SearchEngineSpider BROWALIAS *PNWalker* SearchEngineSpider BROWALIAS *Powermarks* Powermarks BROWALIAS *Pompos* SearchEngineSpider BROWALIAS *Rainbot* SearchEngineSpider BROWALIAS *RepoMonkey* SearchEngineSpider BROWALIAS *Robot* SearchEngineSpider BROWALIAS *Rumours-Agent* SearchEngineSpider BROWALIAS *Scooter* SearchEngineSpider BROWALIAS *ScoutAbout* SearchEngineSpider BROWALIAS *SearchTone* SearchEngineSpider BROWALIAS *Searcher* SearchEngineSpider BROWALIAS *Shark* SearchEngineSpider BROWALIAS *siterank* SearchEngineSpider BROWALIAS *SlySearch* SearchEngineSpider BROWALIAS *Spider* SearchEngineSpider BROWALIAS *Steeler* SearchEngineSpider BROWALIAS *suzuran* SearchEngineSpider BROWALIAS *crawleradmin@icc.melco.co.jp* SearchEngineSpider BROWALIAS *samualt9@bigfoot.com* SearchEngineSpider BROWALIAS *streamguide* SearchEngineSpider BROWALIAS *Teleport* SearchEngineSpider BROWALIAS *Toutatis* SearchEngineSpider BROWALIAS *WFARC* SearchEngineSpider BROWALIAS *WebCapture* SearchEngineSpider BROWALIAS *Webclipping.com* SearchEngineSpider BROWALIAS *WebCrawler* SearchEngineSpider BROWALIAS *WebCopier* SearchEngineSpider BROWALIAS *WebFetch* SearchEngineSpider BROWALIAS *webfetch* SearchEngineSpider BROWALIAS *WebcamSearch* SearchEngineSpider BROWALIAS *WebMiner* SearchEngineSpider BROWALIAS *www.asona.org* SearchEngineSpider BROWALIAS *Wget* SearchEngineSpider BROWALIAS *Xenu* SearchEngineSpider BROWALIAS *ZyBorg* SearchEngineSpider BROWALIAS *Zyborg* SearchEngineSpider BROWALIAS *appie* SearchEngineSpider BROWALIAS *asterias* SearchEngineSpider BROWALIAS *bigbrain* SearchEngineSpider BROWALIAS *bot* SearchEngineSpider BROWALIAS *bumblebee* SearchEngineSpider BROWALIAS *crawler@utopy.com* SearchEngineSpider BROWALIAS *htdig* SearchEngineSpider BROWALIAS *hhjhj@yahoo.com* SearchEngineSpider BROWALIAS *gazz* SearchEngineSpider BROWALIAS *iQuest* SearchEngineSpider BROWALIAS *ia_archiver* SearchEngineSpider BROWALIAS *infoseek* SearchEngineSpider BROWALIAS REGEXPI:knowledge/([\.0-9]*)(.*) SearchEngineSpider BROWALIAS *kobot* SearchEngineSpider BROWALIAS *larbin* SearchEngineSpider BROWALIAS *linkcheck* SearchEngineSpider BROWALIAS *marvin* SearchEngineSpider BROWALIAS *MegaSheep* SearchEngineSpider BROWALIAS "Mozi!" SearchEngineSpider BROWALIAS *moget* SearchEngineSpider BROWALIAS *nkeeper@nkeeper.com* SearchEngineSpider BROWALIAS *picsearch* SearchEngineSpider BROWALIAS *polybot* SearchEngineSpider BROWALIAS *RedKernel* SearchEngineSpider BROWALIAS *robot* SearchEngineSpider BROWALIAS *TrueRobot* SearchEngineSpider BROWALIAS *scooter* SearchEngineSpider BROWALIAS *sitecheck* SearchEngineSpider BROWALIAS *slurp@inktomi.com* SearchEngineSpider BROWALIAS *spider* SearchEngineSpider # see http://www.abc.org.uk/exclusions/exclude.htm BROWALIAS *Spinne* SearchEngineSpider BROWALIAS *targetblaster* SearchEngineSpider BROWALIAS *tivraspider* SearchEngineSpider BROWALIAS *URLBlaze* SearchEngineSpider BROWALIAS *UIowaCrawler* SearchEngineSpider BROWALIAS *vias.ncsa.uiuc.edu* SearchEngineSpider BROWALIAS *Vonna* SearchEngineSpider BROWALIAS *webcollage* SearchEngineSpider BROWALIAS *webreaper* SearchEngineSpider BROWALIAS *Webshuttle* SearchEngineSpider BROWALIAS *WhizBang* SearchEngineSpider BROWALIAS *Whizbang* SearchEngineSpider BROWALIAS *xyleme* SearchEngineSpider BROWALIAS *Zippyfinder* SearchEngineSpider BROWALIAS *Zeus* SearchEngineSpider BROWALIAS *grub* SearchEngineSpider BROWALIAS REGEXP:.*Indy.Library.* SearchEngineSpider BROWALIAS *research@yahoo.com* SearchEngineSpider BROWALIAS *lwp-trivial* SearchEngineSpider # Additional from http://www.abc.org.uk/exclusions/exclude_robots01072002.txt BROWALIAS *247sitewatch* SearchEngineSpider BROWALIAS *abachobot* SearchEngineSpider BROWALIAS *abciauditors* SearchEngineSpider BROWALIAS *acsportalspide* SearchEngineSpiderr BROWALIAS *adnettrack* SearchEngineSpider BROWALIAS *aladdino* SearchEngineSpider BROWALIAS *augurfind* SearchEngineSpider BROWALIAS *indexer* SearchEngineSpider BROWALIAS *apachebench* SearchEngineSpider BROWALIAS *arachnia* SearchEngineSpider BROWALIAS *aranha* SearchEngineSpider BROWALIAS *autonomy* SearchEngineSpider BROWALIAS *avsearch* SearchEngineSpider BROWALIAS *baidu* SearchEngineSpider BROWALIAS *bigbrother* SearchEngineSpider BROWALIAS *bumblebee* SearchEngineSpider BROWALIAS *javacrawler* SearchEngineSpider BROWALIAS *cglconnection* SearchEngineSpider BROWALIAS *checktime* SearchEngineSpider BROWALIAS *checkurl* SearchEngineSpider BROWALIAS *pma-connect* SearchEngineSpider BROWALIAS *cocrawler* SearchEngineSpider BROWALIAS *combine* SearchEngineSpider BROWALIAS *converainternet* SearchEngineSpider BROWALIAS *cosmos* SearchEngineSpider BROWALIAS *craftbot* SearchEngineSpider BROWALIAS *crawlerboy* SearchEngineSpider BROWALIAS *crescent* SearchEngineSpider BROWALIAS REGEXPI:.*crossword.puzzles.* SearchEngineSpider BROWALIAS *cyberspyder* SearchEngineSpider BROWALIAS *cybot* SearchEngineSpider BROWALIAS *daypopbot* SearchEngineSpider BROWALIAS *dialer* SearchEngineSpider BROWALIAS REGEXPI:.*direct.hit.grabber.* SearchEngineSpiderr BROWALIAS REGEXPI:.*disco.watchman.* SearchEngineSpider BROWALIAS *docomo* SearchEngineSpider BROWALIAS *ezdeal* SearchEngineSpider BROWALIAS *EbiNess* SearchEngineSpider BROWALIAS *favorg* SearchEngineSpider BROWALIAS *filehound* SearchEngineSpider BROWALIAS *firehunter* SearchEngineSpider BROWALIAS *flashget* SearchEngineSpider BROWALIAS *flipdog* SearchEngineSpider BROWALIAS *flunky* SearchEngineSpider BROWALIAS *freefind* SearchEngineSpider BROWALIAS *frontier* SearchEngineSpider BROWALIAS *profiler* SearchEngineSpider BROWALIAS *fusionbot* SearchEngineSpider BROWALIAS *gbloogle* SearchEngineSpider BROWALIAS *gcreep* SearchEngineSpider BROWALIAS *gigabaz* SearchEngineSpider BROWALIAS *gmon* SearchEngineSpider BROWALIAS *goblin* SearchEngineSpider BROWALIAS *golem* SearchEngineSpider BROWALIAS *golffindit.com* SearchEngineSpider BROWALIAS *gomezagent* SearchEngineSpider BROWALIAS *grabber* SearchEngineSpider BROWALIAS *graphicbrain.com* SearchEngineSpider BROWALIAS *gulliver* SearchEngineSpider BROWALIAS *hammoun* SearchEngineSpider BROWALIAS REGEXPI:.*hp.openview.network.node.manager.* SearchEngineSpider BROWALIAS *hubater* SearchEngineSpider BROWALIAS *hyperbee* SearchEngineSpider BROWALIAS *ideare* SearchEngineSpider BROWALIAS *iltrovatore-setaccio* SearchEngineSpider BROWALIAS *indexpert* SearchEngineSpider BROWALIAS *internetami* SearchEngineSpider BROWALIAS *internetseer* SearchEngineSpider BROWALIAS REGEXPI:.*inverse.ip.insight.* SearchEngineSpider BROWALIAS *ipswitch_whatsup* SearchEngineSpider BROWALIAS *iremember* SearchEngineSpider BROWALIAS *iria* SearchEngineSpider BROWALIAS *iron33* SearchEngineSpider BROWALIAS *isilo* SearchEngineSpider BROWALIAS REGEXPI:.*jca-net.search.retriever.* SearchEngineSpider BROWALIAS *keynote* SearchEngineSpider BROWALIAS *lachesis* SearchEngineSpider BROWALIAS *legs* SearchEngineSpider BROWALIAS *lexibot* SearchEngineSpider BROWALIAS *linbot* SearchEngineSpider BROWALIAS *linkalarm* SearchEngineSpider BROWALIAS *linkbot* SearchEngineSpider BROWALIAS *linkchecker* SearchEngineSpider BROWALIAS *linkguard* SearchEngineSpider BROWALIAS *linklint* SearchEngineSpider BROWALIAS *linkscan* SearchEngineSpider BROWALIAS *linksweeper* SearchEngineSpider BROWALIAS *linkwalker* SearchEngineSpider BROWALIAS *lnspiderguy* SearchEngineSpider BROWALIAS *lotusdiscovery* SearchEngineSpider BROWALIAS *lwp-trivial* SearchEngineSpider BROWALIAS REGEXPI:.*lycos.-.birdlite.* SearchEngineSpider BROWALIAS *mag-net* SearchEngineSpider BROWALIAS *marketwave* SearchEngineSpider BROWALIAS *mazingo* SearchEngineSpider BROWALIAS *mediacrawler* SearchEngineSpider BROWALIAS *mercator* SearchEngineSpider BROWALIAS *metabuscado* SearchEngineSpider BROWALIAS *metacrawler* SearchEngineSpider BROWALIAS *metagopher* SearchEngineSpider BROWALIAS *mfc_tear_sample* SearchEngineSpider BROWALIAS REGEXPI:.*microsoft.url.control.* "SearchEngineSpider" BROWALIAS REGEXP:.*Microsoft.URL.Control.* "SearchEngineSpider" BROWALIAS REGEXPI:microsoft.url.control.* "SearchEngineSpider" BROWALIAS REGEXP:Microsoft.URL.Control.* "SearchEngineSpider" BROWALIAS *microsoft_site_analyst* SearchEngineSpider BROWALIAS *mindcrawle* SearchEngineSpiderr BROWALIAS *mobipocket* SearchEngineSpider BROWALIAS *mogimogi* SearchEngineSpider BROWALIAS *mondosearch* SearchEngineSpider BROWALIAS *monitor* SearchEngineSpider BROWALIAS *mothra/126-paladium* SearchEngineSpider BROWALIAS "mozilla 2.0 (compatible; msie 3.02; update a; windows nt)" SearchEngineSpider BROWALIAS REGEXPI:.*msdn.surfbear.* SearchEngineSpider BROWALIAS *multitext* SearchEngineSpider BROWALIAS *muscat* SearchEngineSpider BROWALIAS *nabot* SearchEngineSpider BROWALIAS *namowebeditor* SearchEngineSpider BROWALIAS *nbot* SearchEngineSpider BROWALIAS *netants* SearchEngineSpider BROWALIAS *netcarta_webmapper* SearchEngineSpider BROWALIAS *netprospector* SearchEngineSpider BROWALIAS *netrecorder* SearchEngineSpider BROWALIAS *netropa* SearchEngineSpider BROWALIAS *nettracker* SearchEngineSpider BROWALIAS *newsanchor* SearchEngineSpider BROWALIAS *newscan-online* SearchEngineSpider BROWALIAS *newsguard* SearchEngineSpider BROWALIAS *newstoyou.com* SearchEngineSpider BROWALIAS *nobody* SearchEngineSpider BROWALIAS *novell* SearchEngineSpider BROWALIAS *nutch* SearchEngineSpider BROWALIAS *Nutch* SearchEngineSpider BROWALIAS *obot* SearchEngineSpider BROWALIAS *opentextsitecrawler* SearchEngineSpider BROWALIAS *overture* SearchEngineSpider BROWALIAS *oxxbot1* SearchEngineSpider BROWALIAS *pagesentry* SearchEngineSpider BROWALIAS *pavuk* SearchEngineSpider BROWALIAS *picosearch* SearchEngineSpider BROWALIAS *plumtree* SearchEngineSpider BROWALIAS *pompos* SearchEngineSpider BROWALIAS *Poodle* SearchEngineSpider BROWALIAS *pornhunt.net* SearchEngineSpider BROWALIAS *powerup* SearchEngineSpider BROWALIAS *proxinet* SearchEngineSpider BROWALIAS *psbot* SearchEngineSpider BROWALIAS *Python-urllib* SearchEngineSpider BROWALIAS *rabot* SearchEngineSpider BROWALIAS *rational* SearchEngineSpider BROWALIAS *realnamesbot* SearchEngineSpider BROWALIAS *redalert* SearchEngineSpider BROWALIAS *remotesearchmtspider* SearchEngineSpider BROWALIAS *rico* SearchEngineSpider BROWALIAS *robopost* SearchEngineSpider BROWALIAS *savvysearch* SearchEngineSpider BROWALIAS *sawaalrobo* SearchEngineSpider BROWALIAS *schmozilla* SearchEngineSpider BROWALIAS *search.ch* SearchEngineSpider BROWALIAS *searchonly* SearchEngineSpider BROWALIAS *searchtone* SearchEngineSpider BROWALIAS *seekandexplore* SearchEngineSpider BROWALIAS *semiotagger* SearchEngineSpider BROWALIAS *shadow* SearchEngineSpider BROWALIAS *shopper* SearchEngineSpider BROWALIAS *sitemap* SearchEngineSpider BROWALIAS *sitetech-rover* SearchEngineSpider BROWALIAS *snooper* SearchEngineSpider BROWALIAS *softwing_tear_agent* SearchEngineSpider BROWALIAS REGEXPI:.*sonera.plaza.haku.* SearchEngineSpider BROWALIAS *spry_mosaic* SearchEngineSpider BROWALIAS *stackrambler* SearchEngineSpider BROWALIAS *stuff* SearchEngineSpider BROWALIAS *suke* SearchEngineSpider BROWALIAS *szukacz* SearchEngineSpider BROWALIAS *testur1* SearchEngineSpider BROWALIAS *intraformant* SearchEngineSpider BROWALIAS *tocrawl/urldispatcher* SearchEngineSpider BROWALIAS *toutatis* SearchEngineSpider BROWALIAS *true_robot* SearchEngineSpider BROWALIAS *udmsearch* SearchEngineSpider BROWALIAS *ultraseek* SearchEngineSpider BROWALIAS *urlcheck* SearchEngineSpider BROWALIAS *urlspider* SearchEngineSpider BROWALIAS *vagabondo* SearchEngineSpider BROWALIAS *verity* SearchEngineSpider BROWALIAS *voyager* SearchEngineSpider BROWALIAS *webbe* SearchEngineSpider BROWALIAS *webbug* SearchEngineSpider BROWALIAS *webcapture* SearchEngineSpider BROWALIAS *webcheck* SearchEngineSpider BROWALIAS *webclipping.com* SearchEngineSpider BROWALIAS *webcraft* SearchEngineSpider BROWALIAS *webcrawl* SearchEngineSpider BROWALIAS *webdup* SearchEngineSpider BROWALIAS *webenhancer* SearchEngineSpider BROWALIAS *webextractor* SearchEngineSpider BROWALIAS *webferret* SearchEngineSpider BROWALIAS *webinator* SearchEngineSpider BROWALIAS *webindex* SearchEngineSpider BROWALIAS *webseekrobot* SearchEngineSpider BROWALIAS *extractor* SearchEngineSpider BROWALIAS *quester* SearchEngineSpider BROWALIAS *webtrends* SearchEngineSpider BROWALIAS *webzinc* SearchEngineSpider BROWALIAS *wfarc* SearchEngineSpider BROWALIAS *whatsup* SearchEngineSpider BROWALIAS *whatuseek_winona* SearchEngineSpider BROWALIAS *wisecrawler* SearchEngineSpider BROWALIAS *worm* SearchEngineSpider BROWALIAS *wwwc* SearchEngineSpider BROWALIAS *xcrawler* SearchEngineSpider BROWALIAS *x-tractor* SearchEngineSpider BROWALIAS *yandex* SearchEngineSpider BROWALIAS *yarienavoir* SearchEngineSpider BROWALIAS *ync* SearchEngineSpider BROWALIAS *Szukacz* SearchEngineSpider BROWALIAS "Mozilla/4.0 efp@gmx.net" SearchEngineSpider BROWALIAS *BorderManager* SearchEngineSpider BROWALIAS *girafa* SearchEngineSpider BROWALIAS *NutchCrawler* SearchEngineSpider BROWALIAS *Scrubby* SearchEngineSpider BROWALIAS *Zao* SearchEngineSpider BROWALIAS *Creep* SearchEngineSpider BROWALIAS *WebGather* SearchEngineSpider BROWALIAS *Exalead* SearchEngineSpider BROWALIAS REGEXP:libwww-perl/[\.0-9]+.FP/[\.0-9]+ SearchEngineSpider BROWALIAS "Mozilla/3.01 (compatible;)" SearchEngineSpider # http://www.pgts.com.au/pgtsj/pgtsj0208d.html # see also: http://www.mjbdata.co.uk/spider_insight.php # the following are robots listed at http://www.robotstxt.org/ BROWALIAS *Anthill* SearchEngineSpider BROWALIAS *xChaos_Arachne* SearchEngineSpider # # # Validators BROWALIAS *Validator* Validators BROWALIAS *SiteCheck* Validators BROWALIAS *WDG* Validators BROWALIAS *W3CRobot* Validators BROWALIAS *BrowserEmulator* Offline # See http://www.webmasterworld.com/forum11/1124.htm BROWALIAS *RPT-HTTPClient* Validators BROWALIAS *W3C-checklink* Validators BROWALIAS *W3Crawler* Validators BROWALIAS *404-Error-Search* Validators # see http://www.psychedelix.com/agents.html BROWALIAS *libwww-perl* Validators BROWALIAS *Astra* Validators BROWALIAS *Katipo* LinkChecker # Astra is a load tester published by Mercury Interactive # See http://www-svca.mercuryinteractive.com/products/testing/ # # File types # SUBTYPE *.gz,*.Z TYPEOUTPUTALIAS .html ".html [Hypertext Markup Language]" TYPEOUTPUTALIAS .htm ".htm [Hypertext Markup Language]" TYPEOUTPUTALIAS .shtml ".shtml [Server-parsed HTML]" TYPEOUTPUTALIAS .ps ".ps [PostScript]" TYPEOUTPUTALIAS .gz ".gz [Gzip compressed files]" TYPEOUTPUTALIAS .tar.gz ".tar.gz [Compressed archives]" TYPEOUTPUTALIAS .jpg ".jpg [JPEG graphics]" TYPEOUTPUTALIAS .jpeg ".jpeg [JPEG graphics]" TYPEOUTPUTALIAS .gif ".gif [GIF graphics]" TYPEOUTPUTALIAS .png ".png [PNG graphics]" TYPEOUTPUTALIAS .txt ".txt [Plain text]" TYPEOUTPUTALIAS .cgi ".cgi [CGI scripts]" TYPEOUTPUTALIAS .pl ".pl [Perl scripts]" TYPEOUTPUTALIAS .css ".css [Cascading Style Sheets]" TYPEOUTPUTALIAS .class ".class [Java class files]" TYPEOUTPUTALIAS .pdf ".pdf [Adobe Portable Document Format]" TYPEOUTPUTALIAS .zip ".zip [Zip archives]" TYPEOUTPUTALIAS .hqx ".hqx [Macintosh archives]" TYPEOUTPUTALIAS .exe ".exe [Executables]" TYPEOUTPUTALIAS .wav ".wav [WAV sound files]" TYPEOUTPUTALIAS .avi ".avi [AVI movies]" TYPEOUTPUTALIAS .arc ".arc [Compressed archives]" TYPEOUTPUTALIAS .mid ".mid [MIDI sound files]" TYPEOUTPUTALIAS .mp3 ".mp3 [MP3 sound files]" TYPEOUTPUTALIAS .doc ".doc [Microsoft Word document]" TYPEOUTPUTALIAS .rtf ".rtf [Rich Text Format]" TYPEOUTPUTALIAS .mov ".mov [Quick Time movie]" TYPEOUTPUTALIAS .mpg ".mpg [MPEG movie]" TYPEOUTPUTALIAS .mpeg ".mpeg [MPEG movie]" TYPEOUTPUTALIAS .asp ".asp [Active Server Pages]" TYPEOUTPUTALIAS .jsp ".jsp [Java Server Pages]" TYPEOUTPUTALIAS .cfm ".cfm [Cold Fusion]" TYPEOUTPUTALIAS .php ".php [PHP]" TYPEOUTPUTALIAS .js ".js [JavaScript code]" TYPEOUTPUTALIAS .xbm ".xbm [X bitmap]" # # Search Engines # SEARCHENGINE http://*altavista.*/* q SEARCHENGINE http://*yahoo.*/* p SEARCHENGINE http://*google.*/* q SEARCHENGINE http://*lycos.*/* query SEARCHENGINE http://*aol.*/* query SEARCHENGINE http://*excite.*/* search SEARCHENGINE http://*go2net.*/* general SEARCHENGINE http://*metacrawler.*/* general SEARCHENGINE http://*msn.*/* MT SEARCHENGINE http://*hotbot.com/* MT SEARCHENGINE http://*netscape.*/* search SEARCHENGINE http://*looksmart.*/* key SEARCHENGINE http://*infoseek.*/* qt SEARCHENGINE http://*webcrawler.*/* search,searchText SEARCHENGINE http://*goto.*/* Keywords SEARCHENGINE http://*snap.*/* keyword SEARCHENGINE http://*dogpile.*/* q SEARCHENGINE http://*askjeeves.*/* ask SEARCHENGINE http://*ask.*/* ask SEARCHENGINE http://*aj.*/* ask SEARCHENGINE http://*directhit.*/* qry SEARCHENGINE http://*alltheweb.*/* query SEARCHENGINE http://*northernlight.*/* qr SEARCHENGINE http://*nlsearch.*/* qr SEARCHENGINE http://*dmoz.*/* search SEARCHENGINE http://*newhoo.*/* search SEARCHENGINE http://*netfind.*/* query,search,s SEARCHENGINE http://*/netfind* query SEARCHENGINE http://*/pursuit query # # Robots # ROBOTINCLUDE REGEXPI:robot ROBOTINCLUDE REGEXPI:spider ROBOTINCLUDE REGEXPI:crawler ROBOTINCLUDE REGEXPI:spammer ROBOTINCLUDE REGEXPI:validator ROBOTINCLUDE Googlebot* ROBOTINCLUDE Infoseek* ROBOTINCLUDE Inktomi* ROBOTINCLUDE Scooter* ROBOTINCLUDE Slurp* ROBOTINCLUDE Ultraseek* ROBOTINCLUDE Spider # # End of mancnf.cfg