I have an external "rewritemaps.config" file with the following entries:
<rewriteMaps>
<rewriteMap name="Cat1">
<add value="2" key="wheels-and-castors" />
<add value="3" key="agricultural" />
<add value="4" key="hydraulic-hose-and-fittings" />
<add value="5" key="engineering-plastics" />
<add value="6" key="sealing-and-jointing" />
<add value="7" key="health-and-safety" />
<add value="8" key="hose-and-ducting" />
<add value="9" key="hose-couplings-and-clamps" />
<add value="10" key="general-consumables" />
<add value="11" key="ceramics" />
<add value="12" key="rubber-and-polyurethane" />
</rewriteMap>
</rewriteMaps>
How do I rewrite URL's such as:
http://www.domain.com/wheels-and-castors/
to the following:
http://www.domain.com/category.asp?catID=2
Note, this is a rewrite, and not a redirect.
I think I'm almost there with the final rewrite line, but I'm struggling on the match and conditions. My current final rewrite line is:
<action type="Rewrite" url="category.asp?catID={Cat1:{C:1}}" appendQueryString="false" />
(where Cat1 is the name of my rewriteMap)
Thanks for any help. Cheers, Chris.
Ok, after a good week of researching and trial & error, I've got it working. Here's the complete rule I'm using, which references an external "rewritemaps.config".
<rule name="Match Category URLs" stopProcessing="true">
<match url="^(?![^/]+?\.(?:css|png|jpg|axd)$)([^/]+)/?$" />
<conditions>
<add input="{URL}" pattern="(.asp|.xml)" negate="true" />
</conditions>
<action type="Rewrite" url="category.asp?catID={CatMap:{R:1}}" appendQueryString="false" />
</rule>
Works perfectly now.
PS. I renamed the rewriteMap to "CatMap" (to avoid anyone getting confused, if following this).
Related
I have web.config for asp.net core configured like this to block bots MJ12bot|spbot|YandexBot
I'm using IIS 7.5 with Url Rewrite Module 2.1 installed.
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<system.webServer>
<handlers>
<add name="aspNetCore" path="*" verb="*" modules="AspNetCoreModule" resourceType="Unspecified" />
</handlers>
<aspNetCore processPath="dotnet" arguments=".\myproject.dll" stdoutLogEnabled="false" stdoutLogFile=".\logs\stdout">
</aspNetCore>
<rewrite>
<rules>
<rule name="RequestBlockingRule1" patternSyntax="Wildcard" stopProcessing="true">
<match url="*" />
<conditions logicalGrouping="MatchAny">
<add input="{HTTP_USER_AGENT}" pattern="MJ12bot|spbot|YandexBot" />
</conditions>
<action type="CustomResponse" statusCode="403" statusReason="Forbidden: Access is denied." statusDescription="You do not have permission to view this directory or page using the credentials that you supplied." />
</rule>
</rules>
</rewrite>
</system.webServer>
</configuration>
I tried to delete the website logs file and restart the app pool to monitor the new traffic, the bot is still crawling on my site.
2017-07-01 14:29:52 W3SVC33 125.212.217.6 GET /phim/mune-ve-binh-mat-trang-hoat-hinh q=HD2 80 - 144.76.30.241 Mozilla/5.0+(compatible;+MJ12bot/v1.4.7;+http://mj12bot.com/) 200 0 0 7888 403 21995
2017-07-01 14:30:10 W3SVC33 125.212.217.6 GET /phim/tay-du-ky-1-dai-nao-thien-cung q=HD1 80 - 5.9.63.162 Mozilla/5.0+(compatible;+MJ12bot/v1.4.7;+http://mj12bot.com/) 200 0 0 8278 401 33541
2017-07-01 14:30:13 W3SVC33 125.212.217.6 GET /phim/su-that-kinh-hoang q=HD2 80 - 5.9.156.74 Mozilla/5.0+(compatible;+MJ12bot/v1.4.7;+http://mj12bot.com/) 200 0 0 8425 389 19474
I use this rewrite rule, maybe it will work for you. I did not create it myself, found it at https://www.saotn.org/hackrepair-bad-bots-htaccess-web-config-iis/
<rule name="Abuse User Agents Blocking" stopProcessing="true">
<match url=".*" ignoreCase="false" />
<conditions logicalGrouping="MatchAny">
<add input="{HTTP_USER_AGENT}" pattern="^.*(1Noonbot|1on1searchBot|3D_SEARCH|3DE_SEARCH2|3GSE|50.nu|192.comAgent|360Spider|A6-Indexer|AASP|ABACHOBot|Abonti|abot|AbotEmailSearch|Aboundex|AboutUsBot|AccMonitor\ Compliance|accoona|AChulkov.NET\ page\ walker|Acme.Spider|AcoonBot|acquia-crawler|ActiveTouristBot|Acunetix|Ad\ Muncher|AdamM|adbeat_bot|adminshop.com|Advanced\ Email|AESOP_com_SpiderMan|AESpider|AF\ Knowledge\ Now\ Verity|aggregator:Vocus|ah-ha.com|AhrefsBot|AIBOT|aiHitBot|aipbot|AISIID|AITCSRobot|Akamai-SiteSnapshot|AlexaWebSearchPlatform|AlexfDownload|Alexibot|AlkalineBOT|All\ Acronyms|Amfibibot|AmPmPPC.com|AMZNKAssocBot|Anemone|Anonymous|Anonymouse.org|AnotherBot|AnswerBot|AnswerBus|AnswerChase\ PROve|AntBot|antibot-|AntiSantyWorm|Antro.Net|AONDE-Spider|Aport|Aqua_Products|AraBot|Arachmo|Arachnophilia|archive.org_bot|aria\ eQualizer|arianna.libero.it|Arikus_Spider|Art-Online.com|ArtavisBot|Artera|ASpider|ASPSeek|asterias|AstroFind|athenusbot|AtlocalBot|Atomic_Email_Hunter|attach|attrakt|attributor|Attributor.comBot|augurfind|AURESYS|AutoBaron|autoemailspider|autowebdir|AVSearch-|axfeedsbot|Axonize-bot|Ayna|b2w|BackDoorBot|BackRub|BackStreet\ Browser|BackWeb|Baiduspider-video|Bandit|BatchFTP|baypup|BDFetch|BecomeBot|BecomeJPBot|BeetleBot|Bender|besserscheitern-crawl|betaBot|Big\ Brother|Big\ Data|Bigado.com|BigCliqueBot|Bigfoot|BIGLOTRON|Bilbo|BilgiBetaBot|BilgiBot|binlar|bintellibot|bitlybot|BitvoUserAgent|Bizbot003|BizBot04|BizBot04\ kirk.overleaf.com|Black.Hole|Black\ Hole|Blackbird|BlackWidow|bladder\ fusion|Blaiz-Bee|BLEXBot|Blinkx|BlitzBOT|Blog\ Conversation\ Project|BlogMyWay|BlogPulseLive|BlogRefsBot|BlogScope|Blogslive|BloobyBot|BlowFish|BLT|bnf.fr_bot|BoaConstrictor|BoardReader-Image-Fetcher|BOI_crawl_00|BOIA-Scan-Agent|BOIA.ORG-Scan-Agent|boitho.com-dc|Bookmark\ Buddy|bosug|Bot\ Apoena|BotALot|BotRightHere|Botswana|bottybot|BpBot|BRAINTIME_SEARCH|BrokenLinkCheck.com|BrowserEmulator|BrowserMob|BruinBot|BSearchR&D|BSpider|btbot|Btsearch|Buddy|Buibui|BuildCMS|BuiltBotTough|Bullseye|bumblebee|BunnySlippers|BuscadorClarin|Butterfly|BuyHawaiiBot|BuzzBot|byindia|BySpider|byteserver|bzBot|c\ r\ a\ w\ l\ 3\ r|CacheBlaster|CACTVS\ Chemistry|Caddbot|Cafi|Camcrawler|CamelStampede|Canon-WebRecord|Canon-WebRecordPro|CareerBot|casper|cataguru|CatchBot|CazoodleBot|CCBot|CCGCrawl|ccubee|CD-Preload|CE-Preload|Cegbfeieh|Cerberian\ Drtrs|CERT\ FigleafBot|cfetch|CFNetwork|Chameleon|ChangeDetection|Charlotte|Check&Get|Checkbot|Checklinks|checkprivacy|CheeseBot|ChemieDE-NodeBot|CherryPicker|CherryPickerElite|CherryPickerSE|Chilkat|ChinaClaw|CipinetBot|cis455crawler|citeseerxbot|cizilla.com|ClariaBot|clshttp|Clushbot|cmsworldmap|coccoc|CollapsarWEB|Collector|combine|comodo|conceptbot|ConnectSearch|conpilot|ContentSmartz|ContextAd|contype|cookieNET|CoolBott|CoolCheck|Copernic|Copier|CopyRightCheck|core-project|cosmos|Covario-IDS|Cowbot-|Cowdog|crabbyBot|crawl|Crawl_Application|crawl.UserAgent|CrawlConvera|crawler|crawler_for_infomine|CRAWLER-ALTSE.VUNET.ORG-Lynx|crawler-upgrade-config|crawler.kpricorn.org|crawler#|crawler4j|crawler43.ejupiter.com|Crawly|CreativeCommons|Crescent|Crescent\ Internet\ ToolPak\ HTTP\ OLE\ Control|cs-crawler|CSE\ HTML\ Validator|CSHttpClient|Cuasarbot|culsearch|Curl|Custo|Cutbot|cvaulev|Cyberdog|CyberNavi_WebGet|CyberSpyder|CydralSpider).*$" />
<add input="{HTTP_USER_AGENT}" pattern="^.*(D1GArabicEngine|DA|DataCha0s|DataFountains|DataparkSearch|DataSpearSpiderBot|DataSpider|Dattatec.com|Dattatec.com-Sitios-Top|Daumoa|DAUMOA-video|DAUMOA-web|Declumbot|Deepindex|deepnet|DeepTrawl|dejan|del.icio.us-thumbnails|DelvuBot|Deweb|DiaGem|Diamond|DiamondBot|diavol|DiBot|didaxusbot|DigExt|Digger|DiGi-RSSBot|DigitalArchivesBot|DigOut4U|DIIbot|Dillo|Dir_Snatch.exe|DISCo|DISCo\ Pump|discobot|DISCoFinder|Distilled-Reputation-Monitor|Dit|DittoSpyder|DjangoTraineeBot|DKIMRepBot|DoCoMo|DOF-Verify|domaincrawler|DomainScan|DomainWatcher|dotbot|DotSpotsBot|Dow\ Jonesbot|Download|Download\ Demon|Downloader|DOY|dragonfly|Drip|drone|DTAAgent|dtSearchSpider|dumbot|Dwaar|Dwaarbot|DXSeeker|EAH|EasouSpider|EasyDL|ebingbong|EC2LinkFinder|eCairn-Grabber|eCatch|eChooseBot|ecxi|EdisterBot|EduGovSearch|egothor|eidetica.com|EirGrabber|ElisaBot|EllerdaleBot|EMail\ Exractor|EmailCollector|EmailLeach|EmailSiphon|EmailWolf|EMPAS_ROBOT|EnaBot|endeca|EnigmaBot|Enswer\ Neuro|EntityCubeBot|EroCrawler|eStyleSearch|eSyndiCat|Eurosoft-Bot|Evaal|Eventware|Everest-Vulcan|Exabot|Exabot-Images|Exabot-Test|Exabot-XXX|ExaBotTest|ExactSearch|exactseek.com|exooba|Exploder|explorersearch|extract|Extractor|ExtractorPro|EyeNetIE|ez-robot|Ezooms|factbot|FairAd\ Client|falcon|Falconsbot|fast-search-engine|FAST\ Data\ Document|FAST\ ESP|fastbot|fastbot.de|FatBot|Favcollector|Faviconizer|FDM|FedContractorBot|feedfinder|FelixIDE|fembot|fetch_ici|Fetch\ API\ Request|fgcrawler|FHscan|fido|Filangy|FileHound|FindAnISP.com_ISP_Finder|findlinks|FindWeb|Firebat|Fish-Search-Robot|Flaming\ AttackBot|Flamingo_SearchEngine|FlashCapture|FlashGet|flicky|FlickySearchBot|flunky|focused_crawler|FollowSite|Foobot|Fooooo_Web_Video_Crawl|Fopper|FormulaFinderBot|Forschungsportal|fr_crawler|Francis|Freecrawl|FreshDownload|freshlinks.exe|FriendFeedBot|frodo.at|froGgle|FrontPage|Froola|FU-NBI|full_breadth_crawler|FunnelBack|FunWebProducts|FurlBot|g00g1e|G10-Bot|Gaisbot|GalaxyBot|gazz|gcreep|generate_infomine_category_classifiers|genevabot|genieBot|GenieBotRD_SmallCrawl|Genieo|Geomaxenginebot|geometabot|GeonaBot|GeoVisu|GermCrawler|GetHTMLContents|Getleft|GetRight|GetSmart|GetURL.rexx|GetWeb!|Giant|GigablastOpenSource|Gigabot|Girafabot|GleameBot|gnome-vfs|Go-Ahead-Got-It|Go!Zilla|GoForIt.com|GOFORITBOT|gold|Golem|GoodJelly|Gordon-College-Google-Mini|goroam|GoSeebot|gotit|Govbot|GPU\ p2p|grab|Grabber|GrabNet|Grafula|grapeFX|grapeshot|GrapeshotCrawler|grbot|GreenYogi\ [ZSEBOT]|Gromit|GroupMe|grub|grub-client|Grubclient-|GrubNG|GruBot|gsa|GSLFbot|GT::WWW|Gulliver|GulperBot|GurujiBot|GVC|GVC\ BUSINESS|gvcbot.com|HappyFunBot|harvest|HarvestMan|Hatena\ Antenna|Hawler|Hazel's\ Ferret\ hopper|hcat|hclsreport-crawler|HD\ nutch\ agent|Header_Test_Client|healia|Helix|heritrix|hijbul-heritrix-crawler|HiScan|HiSoftware\ AccMonitor|HiSoftware\ AccVerify|hitcrawler_|hivaBot|hloader|HMSEbot|HMView|hoge|holmes|HomePageSearch|Hooblybot-Image|HooWWWer|Hostcrawler|HSFT\ -\ Link|HSFT\ -\ LVU|HSlide|ht:|htdig|Html\ Link\ Validator|HTMLParser|HTTP::Lite|httplib|HTTrack|Huaweisymantecspider|hul-wax|humanlinks|HyperEstraier|Hyperix).*$" />
<add input="{HTTP_USER_AGENT}" pattern="^.*(ia_archiver|IAArchiver-|ibuena|iCab|ICDS-Ingestion|ichiro|iCopyright\ Conductor|id-search|IDBot|IEAutoDiscovery|IECheck|iHWebChecker|IIITBOT|iim_405|IlseBot|IlTrovatore|Iltrovatore-Setaccio|ImageBot|imagefortress|ImagesHereImagesThereImagesEverywhere|ImageVisu|imds_monitor|imo-google-robot-intelink|IncyWincy|Industry\ Cortexcrawler|Indy\ Library|indylabs_marius|InelaBot|Inet32\ Ctrl|inetbot|InfoLink|INFOMINE|infomine.ucr.edu|InfoNaviRobot|Informant|Infoseek|InfoTekies|InfoUSABot|INGRID|Inktomi|InsightsCollector|InsightsWorksBot|InspireBot|InsumaScout|Intelix|InterGET|Internet\ Ninja|InternetLinkAgent|Interseek|IOI|ip-web-crawler.com|Ipselonbot|Iria|IRLbot|Iron33|Isara|iSearch|iSiloX|IsraeliSearch|IstellaBot|its-learning|IU_CSCI_B659_class_crawler|iVia|iVia\ Page\ Fetcher|JadynAve|JadynAveBot|jakarta|Jakarta\ Commons-HttpClient|Java|Jbot|JemmaTheTourist|JennyBot|Jetbot|JetBrains\ Omea\ Pro|JetCar|Jim|JoBo|JobSpider_BA|JOC|JoeDog|JoyScapeBot|JSpyda|JubiiRobot|jumpstation|Junut|JustView|Jyxobot|K.S.Bot|KakcleBot|kalooga|KaloogaBot|kanagawa|KATATUDO-Spider|Katipo|kbeta1|Kenjin.Spider|KeywenBot|Keyword.Density|Keyword\ Density|kinjabot|KIT-Fireball|Kitenga-crawler-bot|KiwiStatus|kmbot-|kmccrew|Knight|KnowItAll|Knowledge.com|Knowledge\ Engine|KoepaBot|Koninklijke|KrOWLer|KSbot|kuloko-bot|kulturarw3|KummHttp|Kurzor|Kyluka|L.webis|LabelGrab|Labhoo|labourunions411|lachesis|Lament|LamerExterminator|LapozzBot|larbin|LARBIN-EXPERIMENTAL|LBot|LeapTag|LeechFTP|LeechGet|LetsCrawl.com|LexiBot|LexxeBot|lftp|libcrawl|libiViaCore|libWeb|libwww|libwww-perl|likse|Linguee|Link|link_checker|LinkAlarm|linkbot|LinkCheck\ by\ Siteimprove.com|LinkChecker|linkdex.com|LinkextractorPro|LinkLint|linklooker|Linkman|LinkScan|LinksCrawler|LinksManager.com_bot|LinkSweeper|linkwalker|LiteFinder|LitlrBot|Little\ Grabber\ at\ Skanktale.com|Livelapbot|LM\ Harvester|LMQueueBot|LNSpiderguy|LoadTimeBot|LocalcomBot|locust|LolongBot|LookBot|Lsearch|lssbot|LWP|lwp-request|lwp-trivial|LWP::Simple|Lycos_Spider|Lydia\ Entity|LynnBot|Lytranslate|Mag-Net|Magnet|magpie-crawler|Magus|Mail.Ru|Mail.Ru_Bot|MAINSEEK_BOT|Mammoth|MarkWatch|MaSagool|masidani_bot_|Mass|Mata.Hari|Mata\ Hari|matentzn\ at\ cs\ dot\ man\ dot\ ac\ dot\ uk|maxamine.com--robot|maxamine.com-robot|maxomobot|Maxthon$|McBot|MediaFox|medrabbit|Megite|MemacBot|Memo|MendeleyBot|Mercator-|mercuryboard_user_agent_sql_injection.nasl|MerzScope|metacarta|Metager2|metager2-verification-bot|MetaGloss|METAGOPHER|metal|metaquerier.cs.uiuc.edu|METASpider|Metaspinner|MetaURI|MetaURI\ API|MFC_Tear_Sample|MFcrawler|MFHttpScan|Microsoft.URL|MIIxpc|miner|mini-robot|minibot|miniRank|Mirror|Missigua\ Locator|Mister.PiX|Mister\ PiX|Miva|MJ12bot|mnoGoSearch|mod_accessibility|moduna.com|moget|MojeekBot|MOMspider|MonkeyCrawl|MOSES|Motor|mowserbot|MQbot|MSE360|MSFrontPage|MSIECrawler|MSIndianWebcrawl|MSMOBOT|Msnbot|msnbot-products|MSNPTC|MSRBOT|MT-Soft|MultiText|My_Little_SearchEngine_Project|my-heritrix-crawler|MyApp|MYCOMPANYBOT|mycrawler|MyEngines-US-Bot|MyFamilyBot|Myra|nabot|nabot_|Najdi.si|Nambu|NAMEPROTECT|NatchCVS|naver|naverbookmarkcrawler|NaverBot|Navroad|NearSite|NEC-MeshExplorer|NeoScioCrawler|NerdByNature.Bot|NerdyBot|Nerima-crawl-).*$" />
<add input="{HTTP_USER_AGENT}" pattern="^.*(T-H-U-N-D-E-R-S-T-O-N-E|Tailrank|tAkeOut|TAMU_CRAWLER|TapuzBot|Tarantula|targetblaster.com|TargetYourNews.com|TAUSDataBot|taxinomiabot|Tecomi|TeezirBot|Teleport|Teleport\ Pro|TeleportPro|Telesoft|Teradex\ Mapper|TERAGRAM_CRAWLER|TerrawizBot|testbot|testing\ of|TextBot|thatrobotsite.com|The.Intraformant|The\ Dyslexalizer|The\ Intraformant|TheNomad|Theophrastus|theusefulbot|TheUsefulbot_|ThumbBot|thumbshots-de-bot|tigerbot|TightTwatBot|TinEye|Titan|to-dress_ru_bot_|to-night-Bot|toCrawl|Topicalizer|topicblogs|Toplistbot|TopServer\ PHP|topyx-crawler|Touche|TourlentaScanner|TPSystem|TRAAZI|TranSGeniKBot|travel-search|TravelBot|TravelLazerBot|Treezy|TREX|TridentSpider|Trovator|True_Robot|tScholarsBot|TsWebBot|TulipChain|turingos|turnit|TurnitinBot|TutorGigBot|TweetedTimes|TweetmemeBot|TwengaBot|TwengaBot-Discover|Twiceler|Twikle|twinuffbot|Twisted\ PageGetter|Twitturls|Twitturly|TygoBot|TygoProwler|Typhoeus|U.S.\ Government\ Printing\ Office|uberbot|ucb-nutch|UCSD-Crawler|UdmSearch|UFAM-crawler-|Ultraseek|UnChaos|unchaos_crawler_|UnisterBot|UniversalSearch|UnwindFetchor|UofTDB_experiment|updated|URI::Fetch|url_gather|URL-Checker|URL\ Control|URLAppendBot|URLBlaze|urlchecker|urlck|UrlDispatcher|urllib|URLSpiderPro|URLy.Warning|USAF\ AFKN\|usasearch|USS-Cosmix|USyd-NLP-Spider|Vacobot|Vacuum|VadixBot|Vagabondo|Validator|Valkyrie|vBSEO|VCI|VerbstarBot|VeriCiteCrawler|Verifactrola|Verity-URL-Gateway|vermut|versus|versus.integis.ch|viasarchivinginformation.html|vikspider|VIP|VIPr|virus-detector|VisBot|Vishal\ For\ CLIA|VisWeb|vlad|vlsearch|VMBot|VocusBot|VoidEYE|VoilaBot|Vortex|voyager|voyager-hc|voyager-partner-deep|VSE|vspider).*$" />
<add input="{HTTP_USER_AGENT}" pattern="^.*(W3C_Unicorn|W3C-WebCon|w3m|w3search|wacbot|wastrix|Water\ Conserve|Water\ Conserve\ Portal|WatzBot|wauuu\ engine|Wavefire|Waypath|Wazzup|Wazzup1.0.4800|wbdbot|web-agent|Web-Sniffer|Web.Image.Collector|Web\ CEO\ Online|Web\ Image\ Collector|Web\ Link\ Validator|Web\ Magnet|webalta|WebaltBot|WebAuto|webbandit|webbot|webbul-bot|WebCapture|webcheck|Webclipping.com|webcollage|WebCopier|WebCopy|WebCorp|webcrawl.net|webcrawler|WebDownloader\ for|Webdup|WebEMailExtrac|WebEMailExtrac.*|WebEnhancer|WebFerret|webfetch|WebFetcher|WebGather|WebGo\ IS|webGobbler|WebImages|Webinator-search2.fasthealth.com|Webinator-WBI|WebIndex|WebIndexer|weblayers|WebLeacher|WeblexBot|WebLinker|webLyzard|WebmasterCoffee|WebmasterWorld|WebmasterWorldForumBot|WebMiner|WebMoose|WeBot|WebPix|WebReaper|WebRipper|WebSauger|Webscan|websearchbench|WebSite|websitemirror|WebSpear|websphinx.test|WebSpider|Webster|Webster.Pro|Webster\ Pro|WebStripper|WebTrafficExpress|WebTrends\ Link\ Analyzer|webvac|webwalk|WebWalker|Webwasher|WebWatch|WebWhacker|WebXM|WebZip|Weddings.info|wenbin|WEPA|WeRelateBot|Whacker|Widow|WikiaBot|Wikio|wikiwix-bot-|WinHttp.WinHttpRequest|WinHTTP\ Example|WIRE|wired-digital-newsbot|WISEbot|WISENutbot|wish-la|wish-project|wisponbot|WMCAI-robot|wminer|WMSBot|woriobot|worldshop|WorQmada|Wotbox|WPScan|wume_crawler|WWW-Mechanize|www.freeloader.com.|WWW\ Collector|WWWOFFLE|wwwrobot|wwwster|WWWWanderer|wwwxref|Wysigot|X-clawler|Xaldon|Xenu|Xenu's|Xerka\ MetaBot|XGET|xirq|XmarksFetch|XoviBot|xqrobot|Y!J|Y!TunnelPro|yacy.net|yacybot|yarienavoir.net|Yasaklibot|yBot|YebolBot|yellowJacket|yes|YesupBot|Yeti|YioopBot|YisouSpider|yolinkBot|yoogliFetchAgent|yoono|Yoriwa|YottaCars_Bot|you-dir|Z-Add\ Link|zagrebin|Zao|zedzo.digest|zedzo.validate|zermelo|Zeus|Zeus\ Link\ Scout|zibber-v|zimeno|Zing-BottaBot|ZipppBot|zmeu|ZoomSpider|ZuiBot|ZumBot|Zyborg|Zyte).*$" />
<add input="{HTTP_USER_AGENT}" pattern="^.*(Nessus|NESSUS::SOAP|nestReader|Net::Trackback|NetAnts|NetCarta\ CyberPilot\ Pro|Netcraft|NetID.com|NetMechanic|Netprospector|NetResearchServer|NetScoop|NetSeer|NetShift=|NetSongBot|Netsparker|NetSpider|NetSrcherP|NetZip|NetZip-Downloader|NewMedhunt|news|News_Search_App|NewsGatherer|Newsgroupreporter|NewsTroveBot|NextGenSearchBot|nextthing.org|NHSEWalker|nicebot|NICErsPRO|niki-bot|NimbleCrawler|nimbus-1|ninetowns|Ninja|NjuiceBot|NLese|Nogate|Nomad-V2.x|NoteworthyBot|NPbot|NPBot-|NRCan\ intranet|NSDL_Search_Bot|nu_tch-princeton|nuggetize.com|nutch|nutch1|NutchCVS|NutchOrg|NWSpider|Nymesis|nys-crawler|ObjectsSearch|oBot|Obvius\ external\ linkcheck|Occam|Ocelli|Octopus|ODP\ entries|Offline.Explorer|Offline\ Explorer|Offline\ Navigator|OGspider|OmiExplorer_Bot|OmniExplorer_Bot|omnifind|OmniWeb|OnetSzukaj|online\ link\ validator|OOZBOT|Openbot|Openfind|Openfind\ data|OpenHoseBot|OpenIntelligenceData|OpenISearch|OpenSearchServer_Bot|OpiDig|optidiscover|OrangeBot|ORISBot|ornl_crawler_1|ORNL_Mercury|osis-project.jp|OsO|OutfoxBot|OutfoxMelonBot|OWLER-BOT|owsBot|ozelot|P3P\ Client|page_verifier|PageBitesHyperBot|Pagebull|PageDown|PageFetcher|PageGrabber|PagePeeker|PageRank\ Monitor|pamsnbot.htm|Panopy|panscient.com|Pansophica|Papa\ Foto|PaperLiBot|parasite|parsijoo|Pathtraq|Pattern|Patwebbot|pavuk|PaxleFramework|PBBOT|pcBrowser|pd-crawler|PECL::HTTP|penthesila|PeoplePal|perform_crawl|PerMan|PGP-KA|PHPCrawl|PhpDig|PicoSearch|pipBot|pipeLiner|Pita|pixfinder|PiyushBot|planetwork|PleaseCrawl|Plucker|Plukkie|Plumtree|Pockey|Pockey-GetHTML|PoCoHTTP|pogodak.ba|Pogodak.co.yu|Poirot|polybot|Pompos|Poodle\ predictor|PopScreenBot|PostPost|PrivacyFinder|ProjectWF-java-test-crawler|ProPowerBot|ProWebWalker|PROXY|psbot|psbot-page|PSS-Bot|psycheclone|pub-crawler|pucl|pulseBot\ \(pulse|Pump|purebot|PWeBot|pycurl|Python-urllib|pythonic-crawler|PythonWikipediaBot|q1|QEAVis\ agent|QFKBot|qualidade|Qualidator.com|QuepasaCreep|QueryN.Metasearch|QueryN\ Metasearch|quest.durato|Quintura-Crw|QunarBot|Qweery_robot.txt_CheckBot|QweeryBot|r2iBot|R6_CommentReader|R6_FeedFetcher|R6_VoteReader|RaBot|Radian6|radian6_linkcheck|RAMPyBot|RankurBot|RcStartBot|RealDownload|Reaper|REBI-shoveler|Recorder|RedBot|RedCarpet|ReGet|RepoMonkey|RepoMonkey\ Bait|Riddler|RIIGHTBOT|RiseNetBot|RiverglassScanner|RMA|RoboPal|Robosourcer|robot|robotek|robots|Robozilla|rogerBot|Rome\ Client|Rondello|Rotondo|Roverbot|RPT-HTTPClient|rtgibot|RufusBot|Runnk\ online\ rss\ reader|s~stremor-crawler|S2Bot|SafariBookmarkChecker|SaladSpoon|Sapienti|SBIder|SBL-BOT|SCFCrawler|Scich|ScientificCommons.org|ScollSpider|ScooperBot|Scooter|ScoutJet|ScrapeBox|Scrapy|SCrawlTest|Scrubby|scSpider|Scumbot|SeaMonkey$|Search-Channel|Search-Engine-Studio|search.KumKie.com|search.msn.com|search.updated.com|search.usgs.gov|Search\ Publisher|Searcharoo.NET|SearchBlox|searchbot|searchengine|searchhippo.com|SearchIt-Bot|searchmarking|searchmarks|searchmee_v|SearchmetricsBot|searchmining|SearchnowBot_v1|searchpreview|SearchSpider.com|SearQuBot|Seekbot|Seeker.lookseek.com|SeeqBot|seeqpod-vertical-crawler|Selflinkchecker|Semager|semanticdiscovery|Semantifire1|semisearch|SemrushBot|Senrigan|SEOENGWorldBot|SeznamBot|ShablastBot|ShadowWebAnalyzer|Shareaza|Shelob|sherlock|ShopWiki|ShowLinks|ShowyouBot|siclab|silk|Siphon|SiteArchive|SiteCheck-sitecrawl|sitecheck.internetseer.com|SiteFinder|SiteGuardBot|SiteOrbiter|SiteSnagger|SiteSucker|SiteSweeper|SiteXpert|SkimBot|SkimWordsBot|SkreemRBot|skygrid|Skywalker|Sleipnir|slow-crawler|SlySearch|smart-crawler|SmartDownload|Smarte|smartwit.com|Snake|Snapbot|SnapPreviewBot|Snappy|snookit|Snooper|Snoopy|SocialSearcher|SocSciBot|SOFT411\ Directory|sogou|sohu-search|sohu\ agent|Sokitomi|Solbot|sootle|Sosospider|Space\ Bison|Space\ Fung|SpaceBison|SpankBot|spanner|Spatineo\ Monitor\ Controller|special_archiver|SpeedySpider|Sphider|Sphider2|spider|Spider.TerraNautic.net|SpiderEngine|SpiderKU|SpiderMan|Spinn3r|Spinne|sportcrew-Bot|spyder3.microsys.com|sqlmap|Squid-Prefetch|SquidClamAV_Redirector|Sqworm|SrevBot|sslbot|SSM\ Agent|StackRambler|StarDownloader|statbot|statcrawler|statedept-crawler|Steeler|STEGMANN-Bot|stero|Stripper|Stumbler|suchclip|sucker|SumeetBot|SumitBot|SummizeBot|SummizeFeedReader|SuperBot|superbot.com|SuperHTTP|SuperLumin|SuperPagesBot|Supybot|SURF|Surfbot|SurfControl|SurveyBot|suzuran|SWEBot|swish-e|SygolBot|SynapticWalker|Syntryx\ ANT\ Scout\ Chassis\ Pheromone|SystemSearch-robot|Szukacz).*$" />
</conditions>
<action type="CustomResponse" statusCode="403" statusReason="Forbidden" statusDescription="Forbidden" />
</rule>
Your pattern MJ12bot|spbot|YandexBot is a regex pattern but the pattern syntax is configured as Wildcard, so no matches found.
Remove the attribute patternSyntax="Wildcard" from your configuration and replace <match url="*" /> with <match url=".*" /> then it will work as expected.
<rule name="RequestBlockingRule1" stopProcessing="true">
<match url=".*" />
<conditions logicalGrouping="MatchAny">
<add input="{HTTP_USER_AGENT}" pattern="MJ12bot|spbot|YandexBot" />
</conditions>
<action type="CustomResponse" statusCode="403" statusReason="Forbidden: Access is denied." statusDescription="You do not have permission to view this directory or page using the credentials that you supplied." />
</rule>
I have added multiple http bindings to my site like:
http://sub1.domain.com
http://sub2.domain.com
http://sub3.domain.com
http://sub4.domain.com
http://sub5.domain.com
I need to add different query string to those URLs when user hits any of those URLs.
http://sub1.domain.com/?qs=10
http://sub2.domain.com/?qs=15
http://sub3.domain.com/?qs=25
http://sub4.domain.com/?qs=30
http://sub5.domain.com/?qs=50
I'm thinking to keep query string values in appSettings keys. like
<appSettings>
<add key ="sub1" value="10" />
<add key ="sub2" value="15" />
...
</appSettings>
I wrote following rule that appends fixed query string. But it'll append qs=10 for all five URLs. But I'm clueless about making it dynamic.
<rule name="Add query string param" stopProcessing="true">
<match url=".*" />
<conditions>
<add input="{QUERY_STRING}" pattern="qs=10" negate="true" />
<add input="&{QUERY_STRING}" pattern="^(&.+)|^&$" />
</conditions>
<action type="Redirect" url="http://{HTTP_HOST}/{R:0}?qs=10{C:1}" appendQueryString="false" />
</rule>
You should probably look at rewrite maps. I haven't tested it, but it should be something like this:
<rewrite>
<rewriteMaps>
<rewriteMap name="SubDomainQueryStrings">
<add key="sub1.domain.com" value="qs=10" />
<add key="sub2.domain.com" value="qs=15" />
<add key="sub3.domain.com" value="qs=25" />
<add key="sub4.domain.com" value="qs=30" />
<add key="sub5.domain.com" value="qs=50" />
</rewriteMap>
</rewriteMaps>
<rules>
<rule name="Add query string param" stopProcessing="true">
<match url=".*" />
<conditions>
<add input="{QUERY_STRING}" pattern="{SubDomainQueryStrings:{HTTP_HOST}}" negate="true" />
<add input="&{QUERY_STRING}" pattern="^(&.+)|^&$" />
</conditions>
<action type="Redirect" url="{R:0}?{SubDomainQueryStrings:{HTTP_HOST}}{C:1}" appendQueryString="false"/>
</rule>
</rules>
<rewrite>
You need to use a HTTP_HOST condition to check for the subdomain like that:
<add input="{HTTP_HOST}" pattern="^sub1" />
Code for first two subdomains should look like that:
<rule name="Add query string param 10" stopProcessing="true">
<match url=".*" />
<conditions>
<add input="{HTTP_HOST}" pattern="^sub1" />
<add input="{QUERY_STRING}" pattern="qs=10" negate="true" />
<add input="&{QUERY_STRING}" pattern="^(&.+)|^&$" />
</conditions>
<action type="Redirect" url="http://{HTTP_HOST}/{R:0}?qs=10{C:1}" appendQueryString="false" />
</rule>
<rule name="Add query string param 15" stopProcessing="true">
<match url=".*" />
<conditions>
<add input="{HTTP_HOST}" pattern="^sub2" />
<add input="{QUERY_STRING}" pattern="qs=15" negate="true" />
<add input="&{QUERY_STRING}" pattern="^(&.+)|^&$" />
</conditions>
<action type="Redirect" url="http://{HTTP_HOST}/{R:0}?qs=15{C:1}" appendQueryString="false" />
</rule>
I have defined a URL Rewrite rule through IIS. Basically it turns something like this:
Article.aspx?ID=1&FriendlyURL=whatever
INTO
/1/whatever
Please note that Redirection is working right, but URL Rewrite (links within the page) are not being translated unless I am inside the Article.aspx page.
How can I make the Rewrite Rule apply to all the pages instead of only one? I'm posting below the written rules from Web.Config for your reference. Thanks.
<system.webServer>
<rewrite>
<outboundRules>
<rule name="OutboundRewriteUserFriendlyURL1" preCondition="ResponseIsHtml1">
<match filterByTags="A, Form, Img" pattern="^(.*/)Article\.aspx\?ID=([^=&]+)&(?:amp;)?FriendlyURL=([^=&]+)$" />
<action type="Rewrite" value="{R:1}{R:2}/{R:3}/" />
</rule>
<preConditions>
<preCondition name="ResponseIsHtml1">
<add input="{RESPONSE_CONTENT_TYPE}" pattern="^text/html" />
</preCondition>
</preConditions>
</outboundRules>
<rewriteMaps>
<rewriteMap name="Article Rewrite">
<add key="Article.aspx?ID=1&FriendlyURL=whatever" value="/1/whatever" />
</rewriteMap>
</rewriteMaps>
<rules>
<rule name="RedirectUserFriendlyURL1" stopProcessing="true">
<match url="^Article\.aspx$" />
<conditions>
<add input="{REQUEST_METHOD}" pattern="^POST$" negate="true" />
<add input="{QUERY_STRING}" pattern="^ID=([^=&]+)&FriendlyURL=([^=&]+)$" />
</conditions>
<action type="Redirect" url="{C:1}/{C:2}" appendQueryString="false" />
</rule>
<rule name="RewriteUserFriendlyURL1" stopProcessing="true">
<match url="^([^/]+)/([^/]+)/?$" />
<conditions>
<add input="{REQUEST_FILENAME}" matchType="IsFile" negate="true" />
<add input="{REQUEST_FILENAME}" matchType="IsDirectory" negate="true" />
</conditions>
<action type="Rewrite" url="Article.aspx?ID={R:1}&FriendlyURL={R:2}" />
</rule>
</rules>
</rewrite>
</system.webServer>
So I finally had to hard-code the links to be url-friendly by setting the "href" attribute within the code.
Something like this:
<a href='/1/hello-world/'>Read the "Hello World" Article</a>
Thanks.
I like regular expressions problems, try this.
<system.webServer>
<rewrite>
<outboundRules>
<clear />
<rule name="OutboundRewriteUserFriendlyURL1"
preCondition="ResponseIsHtml1">
<match filterByTags="A, Form, Img"
pattern="^(.*/)([^\.]+)\.aspx\?ID=([^=&]+)&(?:amp;)?FriendlyURL=([^=&]+)$" />
<conditions logicalGrouping="MatchAll"
trackAllCaptures="true" />
<action type="Rewrite"
value="{R:1}{R:2}/{R:3}/{R:4}/" />
</rule>
<rule name="OutboundRewriteUserFriendlyURL2"
preCondition="ResponseIsHtml1">
<match filterByTags="A, Form, Img"
pattern="^(.*)\?ID=([^=&]+)&(?:amp;)?FriendlyURL=([^=&]+)$" />
<conditions logicalGrouping="MatchAll"
trackAllCaptures="true" />
<action type="Rewrite"
value="" />
</rule>
<preConditions>
<preCondition name="ResponseIsHtml1">
<add input="{RESPONSE_CONTENT_TYPE}"
pattern="^text/html" />
</preCondition>
</preConditions>
</outboundRules>
<rewriteMaps>
<rewriteMap name="Article Rewrite">
<add key="Article.aspx?ID=1&FriendlyURL=whatever"
value="/1/whatever" />
</rewriteMap>
</rewriteMaps>
<rules>
<rule name="RedirectUserFriendlyURL1"
stopProcessing="true">
<match url="^([^\.]+)\.aspx$" />
<conditions>
<add input="{REQUEST_METHOD}"
pattern="^POST$"
negate="true" />
<add input="{QUERY_STRING}"
pattern="^ID=([^=&]+)&FriendlyURL=([^=&]+)$" />
</conditions>
<action type="Redirect"
url="{R:1}/{C:1}/{C:2}"
appendQueryString="false" />
</rule>
<rule name="RewriteUserFriendlyURL1"
stopProcessing="true">
<match url="^([^/]+)/([^/]+)/([^/]+)/?$" />
<conditions>
<add input="{REQUEST_FILENAME}"
matchType="IsFile"
negate="true" />
<add input="{REQUEST_FILENAME}"
matchType="IsDirectory"
negate="true" />
</conditions>
<action type="Rewrite"
url="{R:1}.aspx?ID={R:2}&FriendlyURL={R:3}" />
</rule>
</rules>
</rewrite>
<urlCompression doStaticCompression="false"
doDynamicCompression="false" />
</system.webServer>
The problem is in your OutboundRewrite rule's regular expression. I suggest you get a regex tool like expresso (my favorite), start with a very simple regex, then add complexity as your situation dictates.
The simplest regex that will match your example is:
Article\.aspx\?ID=(\d)&FriendlyURL=(.*)
Here's an example. Godspeed.
I have an URL like that :
/eng/myfolder/mycategory.aspx
I would like to transform it in something like:
index.aspx?FolderId=2&LanguageId=1&CategoryID=12
In a word I want to "write" that,
eng => languageId=2 and myfolder => FolderId=2 and mycategory.....
I don't know if i make myself clear but
Thanks.
with http://mywebsite.com/fr/default
Right Now i have :
<add key = "/en/default" value = "default.aspx?LanguageId=1" />
<add input = " {Rewrite:{REQUEST_URI}}" pattern = "default.aspx\?(.+)" />
<action type="rewrite" url="default.aspx?{C:1}" appendQueryString="False" />
But i want Something more global like:
<add key = "/en" value = "LanguageId=1" />
<add input = " {Rewrite:{REQUEST_URI}}" pattern = "(.+)" />
< action type="rewrite" url="default.aspx?{C:1}" appendQueryString="False" />
This is how I would do it, basically create three maps, one for languages, folders and cateogories. If they have large data sets and possibly changing frequently then I would consider using custom provider built in C#, but for now just:
<configuration>
<system.webServer>
<rewrite>
<rewriteMaps>
<rewriteMap name="Folders">
<add key="myfolder" value="2" />
<add key="anotherfolder" value="3" />
</rewriteMap>
<rewriteMap name="Languages">
<add key="eng" value="2" />
<add key="spa" value="1" />
</rewriteMap>
<rewriteMap name="Categories">
<add key="mycategory" value="12" />
<add key="anothercategory" value="10" />
</rewriteMap>
</rewriteMaps>
<rules>
<rule name="RewriteURLs" stopProcessing="true">
<match url="([^/]+)/([^/]+)/(.+?)\.aspx" />
<action type="Rewrite" url="/index.aspx?FolderId={Folders:{R:2}}&languageid={Languages:{R:1}}&CategoryID={Categories:{R:3}}" />
</rule>
</rules>
</rewrite>
</system.webServer>
</configuration>
I've written some rules for our static content subdomains so that, when they come into IIS, they are redirected to our www. subdomain.
The reason for this is that we have several subdomains being indexed by Google. However, when I create the urls, I am still able to view files at img1.mydomain.com with the statuscode being 200, rather than 301 as I would expect.
Am I doing something wrong?
<!-- Force img domains and non-www users to point at www. -->
<rule name="redirectImgJsAndNonWww" stopProcessing="true">
<match url=".*" />
<conditions>
<add input="{HTTP_HOST}" pattern="^img1.mydomain.com$" />
<add input="{HTTP_HOST}" pattern="^img2.mydomain.com$" />
<add input="{HTTP_HOST}" pattern="^img3.mydomain.com$" />
<add input="{HTTP_HOST}" pattern="^js1.mydomain.com$" />
<add input="{HTTP_HOST}" pattern="^js2.mydomain.com$" />
<add input="{HTTP_HOST}" pattern="^js3.mydomain.com$" />
<add input="{HTTP_HOST}" pattern="^mydomain.com$" />
</conditions>
<action type="Redirect" url="www.mydomain.com/{R:0}" redirectType="Permanent" />
</rule>
Many thanks for any help.
Update: It appears that I was missing the logicalGrouping flag, which was setting my rules to "MatchAll".
<rule name="Redirect to WWW" stopProcessing="true">
<match url=".*" />
<conditions logicalGrouping="MatchAny">
<add input="{HTTP_HOST}" pattern="^mydomain.com$" />
<add input="{HTTP_HOST}" pattern="^img1.mydomain.com$" />
<add input="{HTTP_HOST}" pattern="^img2.mydomain.com$" />
<add input="{HTTP_HOST}" pattern="^img3.mydomain.com$" />
<add input="{HTTP_HOST}" pattern="^js1.mydomain.com$" />
<add input="{HTTP_HOST}" pattern="^js2.mydomain.com$" />
<add input="{HTTP_HOST}" pattern="^js3.mydomain.com$" />
</conditions>
<action type="Redirect" url="http://www.mydomain.com/{R:0}" redirectType="Permanent" />
</rule>
The question has now been answered.
It appears that I was missing the logicalGrouping flag, which was setting my rules to "MatchAll".
<rule name="Redirect to WWW" stopProcessing="true">
<match url=".*" />
<conditions logicalGrouping="MatchAny">
<add input="{HTTP_HOST}" pattern="^mydomain.com$" />
<add input="{HTTP_HOST}" pattern="^img1.mydomain.com$" />
<add input="{HTTP_HOST}" pattern="^img2.mydomain.com$" />
<add input="{HTTP_HOST}" pattern="^img3.mydomain.com$" />
<add input="{HTTP_HOST}" pattern="^js1.mydomain.com$" />
<add input="{HTTP_HOST}" pattern="^js2.mydomain.com$" />
<add input="{HTTP_HOST}" pattern="^js3.mydomain.com$" />
</conditions>
<action type="Redirect" url="http://www.mydomain.com/{R:0}" redirectType="Permanent" />
</rule>