I have web.config for asp.net core configured like this to block bots MJ12bot|spbot|YandexBot
I'm using IIS 7.5 with Url Rewrite Module 2.1 installed.
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<system.webServer>
<handlers>
<add name="aspNetCore" path="*" verb="*" modules="AspNetCoreModule" resourceType="Unspecified" />
</handlers>
<aspNetCore processPath="dotnet" arguments=".\myproject.dll" stdoutLogEnabled="false" stdoutLogFile=".\logs\stdout">
</aspNetCore>
<rewrite>
<rules>
<rule name="RequestBlockingRule1" patternSyntax="Wildcard" stopProcessing="true">
<match url="*" />
<conditions logicalGrouping="MatchAny">
<add input="{HTTP_USER_AGENT}" pattern="MJ12bot|spbot|YandexBot" />
</conditions>
<action type="CustomResponse" statusCode="403" statusReason="Forbidden: Access is denied." statusDescription="You do not have permission to view this directory or page using the credentials that you supplied." />
</rule>
</rules>
</rewrite>
</system.webServer>
</configuration>
I tried to delete the website logs file and restart the app pool to monitor the new traffic, the bot is still crawling on my site.
2017-07-01 14:29:52 W3SVC33 125.212.217.6 GET /phim/mune-ve-binh-mat-trang-hoat-hinh q=HD2 80 - 144.76.30.241 Mozilla/5.0+(compatible;+MJ12bot/v1.4.7;+http://mj12bot.com/) 200 0 0 7888 403 21995
2017-07-01 14:30:10 W3SVC33 125.212.217.6 GET /phim/tay-du-ky-1-dai-nao-thien-cung q=HD1 80 - 5.9.63.162 Mozilla/5.0+(compatible;+MJ12bot/v1.4.7;+http://mj12bot.com/) 200 0 0 8278 401 33541
2017-07-01 14:30:13 W3SVC33 125.212.217.6 GET /phim/su-that-kinh-hoang q=HD2 80 - 5.9.156.74 Mozilla/5.0+(compatible;+MJ12bot/v1.4.7;+http://mj12bot.com/) 200 0 0 8425 389 19474
I use this rewrite rule, maybe it will work for you. I did not create it myself, found it at https://www.saotn.org/hackrepair-bad-bots-htaccess-web-config-iis/
<rule name="Abuse User Agents Blocking" stopProcessing="true">
<match url=".*" ignoreCase="false" />
<conditions logicalGrouping="MatchAny">
<add input="{HTTP_USER_AGENT}" pattern="^.*(1Noonbot|1on1searchBot|3D_SEARCH|3DE_SEARCH2|3GSE|50.nu|192.comAgent|360Spider|A6-Indexer|AASP|ABACHOBot|Abonti|abot|AbotEmailSearch|Aboundex|AboutUsBot|AccMonitor\ Compliance|accoona|AChulkov.NET\ page\ walker|Acme.Spider|AcoonBot|acquia-crawler|ActiveTouristBot|Acunetix|Ad\ Muncher|AdamM|adbeat_bot|adminshop.com|Advanced\ Email|AESOP_com_SpiderMan|AESpider|AF\ Knowledge\ Now\ Verity|aggregator:Vocus|ah-ha.com|AhrefsBot|AIBOT|aiHitBot|aipbot|AISIID|AITCSRobot|Akamai-SiteSnapshot|AlexaWebSearchPlatform|AlexfDownload|Alexibot|AlkalineBOT|All\ Acronyms|Amfibibot|AmPmPPC.com|AMZNKAssocBot|Anemone|Anonymous|Anonymouse.org|AnotherBot|AnswerBot|AnswerBus|AnswerChase\ PROve|AntBot|antibot-|AntiSantyWorm|Antro.Net|AONDE-Spider|Aport|Aqua_Products|AraBot|Arachmo|Arachnophilia|archive.org_bot|aria\ eQualizer|arianna.libero.it|Arikus_Spider|Art-Online.com|ArtavisBot|Artera|ASpider|ASPSeek|asterias|AstroFind|athenusbot|AtlocalBot|Atomic_Email_Hunter|attach|attrakt|attributor|Attributor.comBot|augurfind|AURESYS|AutoBaron|autoemailspider|autowebdir|AVSearch-|axfeedsbot|Axonize-bot|Ayna|b2w|BackDoorBot|BackRub|BackStreet\ Browser|BackWeb|Baiduspider-video|Bandit|BatchFTP|baypup|BDFetch|BecomeBot|BecomeJPBot|BeetleBot|Bender|besserscheitern-crawl|betaBot|Big\ Brother|Big\ Data|Bigado.com|BigCliqueBot|Bigfoot|BIGLOTRON|Bilbo|BilgiBetaBot|BilgiBot|binlar|bintellibot|bitlybot|BitvoUserAgent|Bizbot003|BizBot04|BizBot04\ kirk.overleaf.com|Black.Hole|Black\ Hole|Blackbird|BlackWidow|bladder\ fusion|Blaiz-Bee|BLEXBot|Blinkx|BlitzBOT|Blog\ Conversation\ Project|BlogMyWay|BlogPulseLive|BlogRefsBot|BlogScope|Blogslive|BloobyBot|BlowFish|BLT|bnf.fr_bot|BoaConstrictor|BoardReader-Image-Fetcher|BOI_crawl_00|BOIA-Scan-Agent|BOIA.ORG-Scan-Agent|boitho.com-dc|Bookmark\ Buddy|bosug|Bot\ Apoena|BotALot|BotRightHere|Botswana|bottybot|BpBot|BRAINTIME_SEARCH|BrokenLinkCheck.com|BrowserEmulator|BrowserMob|BruinBot|BSearchR&D|BSpider|btbot|Btsearch|Buddy|Buibui|BuildCMS|BuiltBotTough|Bullseye|bumblebee|BunnySlippers|BuscadorClarin|Butterfly|BuyHawaiiBot|BuzzBot|byindia|BySpider|byteserver|bzBot|c\ r\ a\ w\ l\ 3\ r|CacheBlaster|CACTVS\ Chemistry|Caddbot|Cafi|Camcrawler|CamelStampede|Canon-WebRecord|Canon-WebRecordPro|CareerBot|casper|cataguru|CatchBot|CazoodleBot|CCBot|CCGCrawl|ccubee|CD-Preload|CE-Preload|Cegbfeieh|Cerberian\ Drtrs|CERT\ FigleafBot|cfetch|CFNetwork|Chameleon|ChangeDetection|Charlotte|Check&Get|Checkbot|Checklinks|checkprivacy|CheeseBot|ChemieDE-NodeBot|CherryPicker|CherryPickerElite|CherryPickerSE|Chilkat|ChinaClaw|CipinetBot|cis455crawler|citeseerxbot|cizilla.com|ClariaBot|clshttp|Clushbot|cmsworldmap|coccoc|CollapsarWEB|Collector|combine|comodo|conceptbot|ConnectSearch|conpilot|ContentSmartz|ContextAd|contype|cookieNET|CoolBott|CoolCheck|Copernic|Copier|CopyRightCheck|core-project|cosmos|Covario-IDS|Cowbot-|Cowdog|crabbyBot|crawl|Crawl_Application|crawl.UserAgent|CrawlConvera|crawler|crawler_for_infomine|CRAWLER-ALTSE.VUNET.ORG-Lynx|crawler-upgrade-config|crawler.kpricorn.org|crawler#|crawler4j|crawler43.ejupiter.com|Crawly|CreativeCommons|Crescent|Crescent\ Internet\ ToolPak\ HTTP\ OLE\ Control|cs-crawler|CSE\ HTML\ Validator|CSHttpClient|Cuasarbot|culsearch|Curl|Custo|Cutbot|cvaulev|Cyberdog|CyberNavi_WebGet|CyberSpyder|CydralSpider).*$" />
<add input="{HTTP_USER_AGENT}" pattern="^.*(D1GArabicEngine|DA|DataCha0s|DataFountains|DataparkSearch|DataSpearSpiderBot|DataSpider|Dattatec.com|Dattatec.com-Sitios-Top|Daumoa|DAUMOA-video|DAUMOA-web|Declumbot|Deepindex|deepnet|DeepTrawl|dejan|del.icio.us-thumbnails|DelvuBot|Deweb|DiaGem|Diamond|DiamondBot|diavol|DiBot|didaxusbot|DigExt|Digger|DiGi-RSSBot|DigitalArchivesBot|DigOut4U|DIIbot|Dillo|Dir_Snatch.exe|DISCo|DISCo\ Pump|discobot|DISCoFinder|Distilled-Reputation-Monitor|Dit|DittoSpyder|DjangoTraineeBot|DKIMRepBot|DoCoMo|DOF-Verify|domaincrawler|DomainScan|DomainWatcher|dotbot|DotSpotsBot|Dow\ Jonesbot|Download|Download\ Demon|Downloader|DOY|dragonfly|Drip|drone|DTAAgent|dtSearchSpider|dumbot|Dwaar|Dwaarbot|DXSeeker|EAH|EasouSpider|EasyDL|ebingbong|EC2LinkFinder|eCairn-Grabber|eCatch|eChooseBot|ecxi|EdisterBot|EduGovSearch|egothor|eidetica.com|EirGrabber|ElisaBot|EllerdaleBot|EMail\ Exractor|EmailCollector|EmailLeach|EmailSiphon|EmailWolf|EMPAS_ROBOT|EnaBot|endeca|EnigmaBot|Enswer\ Neuro|EntityCubeBot|EroCrawler|eStyleSearch|eSyndiCat|Eurosoft-Bot|Evaal|Eventware|Everest-Vulcan|Exabot|Exabot-Images|Exabot-Test|Exabot-XXX|ExaBotTest|ExactSearch|exactseek.com|exooba|Exploder|explorersearch|extract|Extractor|ExtractorPro|EyeNetIE|ez-robot|Ezooms|factbot|FairAd\ Client|falcon|Falconsbot|fast-search-engine|FAST\ Data\ Document|FAST\ ESP|fastbot|fastbot.de|FatBot|Favcollector|Faviconizer|FDM|FedContractorBot|feedfinder|FelixIDE|fembot|fetch_ici|Fetch\ API\ Request|fgcrawler|FHscan|fido|Filangy|FileHound|FindAnISP.com_ISP_Finder|findlinks|FindWeb|Firebat|Fish-Search-Robot|Flaming\ AttackBot|Flamingo_SearchEngine|FlashCapture|FlashGet|flicky|FlickySearchBot|flunky|focused_crawler|FollowSite|Foobot|Fooooo_Web_Video_Crawl|Fopper|FormulaFinderBot|Forschungsportal|fr_crawler|Francis|Freecrawl|FreshDownload|freshlinks.exe|FriendFeedBot|frodo.at|froGgle|FrontPage|Froola|FU-NBI|full_breadth_crawler|FunnelBack|FunWebProducts|FurlBot|g00g1e|G10-Bot|Gaisbot|GalaxyBot|gazz|gcreep|generate_infomine_category_classifiers|genevabot|genieBot|GenieBotRD_SmallCrawl|Genieo|Geomaxenginebot|geometabot|GeonaBot|GeoVisu|GermCrawler|GetHTMLContents|Getleft|GetRight|GetSmart|GetURL.rexx|GetWeb!|Giant|GigablastOpenSource|Gigabot|Girafabot|GleameBot|gnome-vfs|Go-Ahead-Got-It|Go!Zilla|GoForIt.com|GOFORITBOT|gold|Golem|GoodJelly|Gordon-College-Google-Mini|goroam|GoSeebot|gotit|Govbot|GPU\ p2p|grab|Grabber|GrabNet|Grafula|grapeFX|grapeshot|GrapeshotCrawler|grbot|GreenYogi\ [ZSEBOT]|Gromit|GroupMe|grub|grub-client|Grubclient-|GrubNG|GruBot|gsa|GSLFbot|GT::WWW|Gulliver|GulperBot|GurujiBot|GVC|GVC\ BUSINESS|gvcbot.com|HappyFunBot|harvest|HarvestMan|Hatena\ Antenna|Hawler|Hazel's\ Ferret\ hopper|hcat|hclsreport-crawler|HD\ nutch\ agent|Header_Test_Client|healia|Helix|heritrix|hijbul-heritrix-crawler|HiScan|HiSoftware\ AccMonitor|HiSoftware\ AccVerify|hitcrawler_|hivaBot|hloader|HMSEbot|HMView|hoge|holmes|HomePageSearch|Hooblybot-Image|HooWWWer|Hostcrawler|HSFT\ -\ Link|HSFT\ -\ LVU|HSlide|ht:|htdig|Html\ Link\ Validator|HTMLParser|HTTP::Lite|httplib|HTTrack|Huaweisymantecspider|hul-wax|humanlinks|HyperEstraier|Hyperix).*$" />
<add input="{HTTP_USER_AGENT}" pattern="^.*(ia_archiver|IAArchiver-|ibuena|iCab|ICDS-Ingestion|ichiro|iCopyright\ Conductor|id-search|IDBot|IEAutoDiscovery|IECheck|iHWebChecker|IIITBOT|iim_405|IlseBot|IlTrovatore|Iltrovatore-Setaccio|ImageBot|imagefortress|ImagesHereImagesThereImagesEverywhere|ImageVisu|imds_monitor|imo-google-robot-intelink|IncyWincy|Industry\ Cortexcrawler|Indy\ Library|indylabs_marius|InelaBot|Inet32\ Ctrl|inetbot|InfoLink|INFOMINE|infomine.ucr.edu|InfoNaviRobot|Informant|Infoseek|InfoTekies|InfoUSABot|INGRID|Inktomi|InsightsCollector|InsightsWorksBot|InspireBot|InsumaScout|Intelix|InterGET|Internet\ Ninja|InternetLinkAgent|Interseek|IOI|ip-web-crawler.com|Ipselonbot|Iria|IRLbot|Iron33|Isara|iSearch|iSiloX|IsraeliSearch|IstellaBot|its-learning|IU_CSCI_B659_class_crawler|iVia|iVia\ Page\ Fetcher|JadynAve|JadynAveBot|jakarta|Jakarta\ Commons-HttpClient|Java|Jbot|JemmaTheTourist|JennyBot|Jetbot|JetBrains\ Omea\ Pro|JetCar|Jim|JoBo|JobSpider_BA|JOC|JoeDog|JoyScapeBot|JSpyda|JubiiRobot|jumpstation|Junut|JustView|Jyxobot|K.S.Bot|KakcleBot|kalooga|KaloogaBot|kanagawa|KATATUDO-Spider|Katipo|kbeta1|Kenjin.Spider|KeywenBot|Keyword.Density|Keyword\ Density|kinjabot|KIT-Fireball|Kitenga-crawler-bot|KiwiStatus|kmbot-|kmccrew|Knight|KnowItAll|Knowledge.com|Knowledge\ Engine|KoepaBot|Koninklijke|KrOWLer|KSbot|kuloko-bot|kulturarw3|KummHttp|Kurzor|Kyluka|L.webis|LabelGrab|Labhoo|labourunions411|lachesis|Lament|LamerExterminator|LapozzBot|larbin|LARBIN-EXPERIMENTAL|LBot|LeapTag|LeechFTP|LeechGet|LetsCrawl.com|LexiBot|LexxeBot|lftp|libcrawl|libiViaCore|libWeb|libwww|libwww-perl|likse|Linguee|Link|link_checker|LinkAlarm|linkbot|LinkCheck\ by\ Siteimprove.com|LinkChecker|linkdex.com|LinkextractorPro|LinkLint|linklooker|Linkman|LinkScan|LinksCrawler|LinksManager.com_bot|LinkSweeper|linkwalker|LiteFinder|LitlrBot|Little\ Grabber\ at\ Skanktale.com|Livelapbot|LM\ Harvester|LMQueueBot|LNSpiderguy|LoadTimeBot|LocalcomBot|locust|LolongBot|LookBot|Lsearch|lssbot|LWP|lwp-request|lwp-trivial|LWP::Simple|Lycos_Spider|Lydia\ Entity|LynnBot|Lytranslate|Mag-Net|Magnet|magpie-crawler|Magus|Mail.Ru|Mail.Ru_Bot|MAINSEEK_BOT|Mammoth|MarkWatch|MaSagool|masidani_bot_|Mass|Mata.Hari|Mata\ Hari|matentzn\ at\ cs\ dot\ man\ dot\ ac\ dot\ uk|maxamine.com--robot|maxamine.com-robot|maxomobot|Maxthon$|McBot|MediaFox|medrabbit|Megite|MemacBot|Memo|MendeleyBot|Mercator-|mercuryboard_user_agent_sql_injection.nasl|MerzScope|metacarta|Metager2|metager2-verification-bot|MetaGloss|METAGOPHER|metal|metaquerier.cs.uiuc.edu|METASpider|Metaspinner|MetaURI|MetaURI\ API|MFC_Tear_Sample|MFcrawler|MFHttpScan|Microsoft.URL|MIIxpc|miner|mini-robot|minibot|miniRank|Mirror|Missigua\ Locator|Mister.PiX|Mister\ PiX|Miva|MJ12bot|mnoGoSearch|mod_accessibility|moduna.com|moget|MojeekBot|MOMspider|MonkeyCrawl|MOSES|Motor|mowserbot|MQbot|MSE360|MSFrontPage|MSIECrawler|MSIndianWebcrawl|MSMOBOT|Msnbot|msnbot-products|MSNPTC|MSRBOT|MT-Soft|MultiText|My_Little_SearchEngine_Project|my-heritrix-crawler|MyApp|MYCOMPANYBOT|mycrawler|MyEngines-US-Bot|MyFamilyBot|Myra|nabot|nabot_|Najdi.si|Nambu|NAMEPROTECT|NatchCVS|naver|naverbookmarkcrawler|NaverBot|Navroad|NearSite|NEC-MeshExplorer|NeoScioCrawler|NerdByNature.Bot|NerdyBot|Nerima-crawl-).*$" />
<add input="{HTTP_USER_AGENT}" pattern="^.*(T-H-U-N-D-E-R-S-T-O-N-E|Tailrank|tAkeOut|TAMU_CRAWLER|TapuzBot|Tarantula|targetblaster.com|TargetYourNews.com|TAUSDataBot|taxinomiabot|Tecomi|TeezirBot|Teleport|Teleport\ Pro|TeleportPro|Telesoft|Teradex\ Mapper|TERAGRAM_CRAWLER|TerrawizBot|testbot|testing\ of|TextBot|thatrobotsite.com|The.Intraformant|The\ Dyslexalizer|The\ Intraformant|TheNomad|Theophrastus|theusefulbot|TheUsefulbot_|ThumbBot|thumbshots-de-bot|tigerbot|TightTwatBot|TinEye|Titan|to-dress_ru_bot_|to-night-Bot|toCrawl|Topicalizer|topicblogs|Toplistbot|TopServer\ PHP|topyx-crawler|Touche|TourlentaScanner|TPSystem|TRAAZI|TranSGeniKBot|travel-search|TravelBot|TravelLazerBot|Treezy|TREX|TridentSpider|Trovator|True_Robot|tScholarsBot|TsWebBot|TulipChain|turingos|turnit|TurnitinBot|TutorGigBot|TweetedTimes|TweetmemeBot|TwengaBot|TwengaBot-Discover|Twiceler|Twikle|twinuffbot|Twisted\ PageGetter|Twitturls|Twitturly|TygoBot|TygoProwler|Typhoeus|U.S.\ Government\ Printing\ Office|uberbot|ucb-nutch|UCSD-Crawler|UdmSearch|UFAM-crawler-|Ultraseek|UnChaos|unchaos_crawler_|UnisterBot|UniversalSearch|UnwindFetchor|UofTDB_experiment|updated|URI::Fetch|url_gather|URL-Checker|URL\ Control|URLAppendBot|URLBlaze|urlchecker|urlck|UrlDispatcher|urllib|URLSpiderPro|URLy.Warning|USAF\ AFKN\|usasearch|USS-Cosmix|USyd-NLP-Spider|Vacobot|Vacuum|VadixBot|Vagabondo|Validator|Valkyrie|vBSEO|VCI|VerbstarBot|VeriCiteCrawler|Verifactrola|Verity-URL-Gateway|vermut|versus|versus.integis.ch|viasarchivinginformation.html|vikspider|VIP|VIPr|virus-detector|VisBot|Vishal\ For\ CLIA|VisWeb|vlad|vlsearch|VMBot|VocusBot|VoidEYE|VoilaBot|Vortex|voyager|voyager-hc|voyager-partner-deep|VSE|vspider).*$" />
<add input="{HTTP_USER_AGENT}" pattern="^.*(W3C_Unicorn|W3C-WebCon|w3m|w3search|wacbot|wastrix|Water\ Conserve|Water\ Conserve\ Portal|WatzBot|wauuu\ engine|Wavefire|Waypath|Wazzup|Wazzup1.0.4800|wbdbot|web-agent|Web-Sniffer|Web.Image.Collector|Web\ CEO\ Online|Web\ Image\ Collector|Web\ Link\ Validator|Web\ Magnet|webalta|WebaltBot|WebAuto|webbandit|webbot|webbul-bot|WebCapture|webcheck|Webclipping.com|webcollage|WebCopier|WebCopy|WebCorp|webcrawl.net|webcrawler|WebDownloader\ for|Webdup|WebEMailExtrac|WebEMailExtrac.*|WebEnhancer|WebFerret|webfetch|WebFetcher|WebGather|WebGo\ IS|webGobbler|WebImages|Webinator-search2.fasthealth.com|Webinator-WBI|WebIndex|WebIndexer|weblayers|WebLeacher|WeblexBot|WebLinker|webLyzard|WebmasterCoffee|WebmasterWorld|WebmasterWorldForumBot|WebMiner|WebMoose|WeBot|WebPix|WebReaper|WebRipper|WebSauger|Webscan|websearchbench|WebSite|websitemirror|WebSpear|websphinx.test|WebSpider|Webster|Webster.Pro|Webster\ Pro|WebStripper|WebTrafficExpress|WebTrends\ Link\ Analyzer|webvac|webwalk|WebWalker|Webwasher|WebWatch|WebWhacker|WebXM|WebZip|Weddings.info|wenbin|WEPA|WeRelateBot|Whacker|Widow|WikiaBot|Wikio|wikiwix-bot-|WinHttp.WinHttpRequest|WinHTTP\ Example|WIRE|wired-digital-newsbot|WISEbot|WISENutbot|wish-la|wish-project|wisponbot|WMCAI-robot|wminer|WMSBot|woriobot|worldshop|WorQmada|Wotbox|WPScan|wume_crawler|WWW-Mechanize|www.freeloader.com.|WWW\ Collector|WWWOFFLE|wwwrobot|wwwster|WWWWanderer|wwwxref|Wysigot|X-clawler|Xaldon|Xenu|Xenu's|Xerka\ MetaBot|XGET|xirq|XmarksFetch|XoviBot|xqrobot|Y!J|Y!TunnelPro|yacy.net|yacybot|yarienavoir.net|Yasaklibot|yBot|YebolBot|yellowJacket|yes|YesupBot|Yeti|YioopBot|YisouSpider|yolinkBot|yoogliFetchAgent|yoono|Yoriwa|YottaCars_Bot|you-dir|Z-Add\ Link|zagrebin|Zao|zedzo.digest|zedzo.validate|zermelo|Zeus|Zeus\ Link\ Scout|zibber-v|zimeno|Zing-BottaBot|ZipppBot|zmeu|ZoomSpider|ZuiBot|ZumBot|Zyborg|Zyte).*$" />
<add input="{HTTP_USER_AGENT}" pattern="^.*(Nessus|NESSUS::SOAP|nestReader|Net::Trackback|NetAnts|NetCarta\ CyberPilot\ Pro|Netcraft|NetID.com|NetMechanic|Netprospector|NetResearchServer|NetScoop|NetSeer|NetShift=|NetSongBot|Netsparker|NetSpider|NetSrcherP|NetZip|NetZip-Downloader|NewMedhunt|news|News_Search_App|NewsGatherer|Newsgroupreporter|NewsTroveBot|NextGenSearchBot|nextthing.org|NHSEWalker|nicebot|NICErsPRO|niki-bot|NimbleCrawler|nimbus-1|ninetowns|Ninja|NjuiceBot|NLese|Nogate|Nomad-V2.x|NoteworthyBot|NPbot|NPBot-|NRCan\ intranet|NSDL_Search_Bot|nu_tch-princeton|nuggetize.com|nutch|nutch1|NutchCVS|NutchOrg|NWSpider|Nymesis|nys-crawler|ObjectsSearch|oBot|Obvius\ external\ linkcheck|Occam|Ocelli|Octopus|ODP\ entries|Offline.Explorer|Offline\ Explorer|Offline\ Navigator|OGspider|OmiExplorer_Bot|OmniExplorer_Bot|omnifind|OmniWeb|OnetSzukaj|online\ link\ validator|OOZBOT|Openbot|Openfind|Openfind\ data|OpenHoseBot|OpenIntelligenceData|OpenISearch|OpenSearchServer_Bot|OpiDig|optidiscover|OrangeBot|ORISBot|ornl_crawler_1|ORNL_Mercury|osis-project.jp|OsO|OutfoxBot|OutfoxMelonBot|OWLER-BOT|owsBot|ozelot|P3P\ Client|page_verifier|PageBitesHyperBot|Pagebull|PageDown|PageFetcher|PageGrabber|PagePeeker|PageRank\ Monitor|pamsnbot.htm|Panopy|panscient.com|Pansophica|Papa\ Foto|PaperLiBot|parasite|parsijoo|Pathtraq|Pattern|Patwebbot|pavuk|PaxleFramework|PBBOT|pcBrowser|pd-crawler|PECL::HTTP|penthesila|PeoplePal|perform_crawl|PerMan|PGP-KA|PHPCrawl|PhpDig|PicoSearch|pipBot|pipeLiner|Pita|pixfinder|PiyushBot|planetwork|PleaseCrawl|Plucker|Plukkie|Plumtree|Pockey|Pockey-GetHTML|PoCoHTTP|pogodak.ba|Pogodak.co.yu|Poirot|polybot|Pompos|Poodle\ predictor|PopScreenBot|PostPost|PrivacyFinder|ProjectWF-java-test-crawler|ProPowerBot|ProWebWalker|PROXY|psbot|psbot-page|PSS-Bot|psycheclone|pub-crawler|pucl|pulseBot\ \(pulse|Pump|purebot|PWeBot|pycurl|Python-urllib|pythonic-crawler|PythonWikipediaBot|q1|QEAVis\ agent|QFKBot|qualidade|Qualidator.com|QuepasaCreep|QueryN.Metasearch|QueryN\ Metasearch|quest.durato|Quintura-Crw|QunarBot|Qweery_robot.txt_CheckBot|QweeryBot|r2iBot|R6_CommentReader|R6_FeedFetcher|R6_VoteReader|RaBot|Radian6|radian6_linkcheck|RAMPyBot|RankurBot|RcStartBot|RealDownload|Reaper|REBI-shoveler|Recorder|RedBot|RedCarpet|ReGet|RepoMonkey|RepoMonkey\ Bait|Riddler|RIIGHTBOT|RiseNetBot|RiverglassScanner|RMA|RoboPal|Robosourcer|robot|robotek|robots|Robozilla|rogerBot|Rome\ Client|Rondello|Rotondo|Roverbot|RPT-HTTPClient|rtgibot|RufusBot|Runnk\ online\ rss\ reader|s~stremor-crawler|S2Bot|SafariBookmarkChecker|SaladSpoon|Sapienti|SBIder|SBL-BOT|SCFCrawler|Scich|ScientificCommons.org|ScollSpider|ScooperBot|Scooter|ScoutJet|ScrapeBox|Scrapy|SCrawlTest|Scrubby|scSpider|Scumbot|SeaMonkey$|Search-Channel|Search-Engine-Studio|search.KumKie.com|search.msn.com|search.updated.com|search.usgs.gov|Search\ Publisher|Searcharoo.NET|SearchBlox|searchbot|searchengine|searchhippo.com|SearchIt-Bot|searchmarking|searchmarks|searchmee_v|SearchmetricsBot|searchmining|SearchnowBot_v1|searchpreview|SearchSpider.com|SearQuBot|Seekbot|Seeker.lookseek.com|SeeqBot|seeqpod-vertical-crawler|Selflinkchecker|Semager|semanticdiscovery|Semantifire1|semisearch|SemrushBot|Senrigan|SEOENGWorldBot|SeznamBot|ShablastBot|ShadowWebAnalyzer|Shareaza|Shelob|sherlock|ShopWiki|ShowLinks|ShowyouBot|siclab|silk|Siphon|SiteArchive|SiteCheck-sitecrawl|sitecheck.internetseer.com|SiteFinder|SiteGuardBot|SiteOrbiter|SiteSnagger|SiteSucker|SiteSweeper|SiteXpert|SkimBot|SkimWordsBot|SkreemRBot|skygrid|Skywalker|Sleipnir|slow-crawler|SlySearch|smart-crawler|SmartDownload|Smarte|smartwit.com|Snake|Snapbot|SnapPreviewBot|Snappy|snookit|Snooper|Snoopy|SocialSearcher|SocSciBot|SOFT411\ Directory|sogou|sohu-search|sohu\ agent|Sokitomi|Solbot|sootle|Sosospider|Space\ Bison|Space\ Fung|SpaceBison|SpankBot|spanner|Spatineo\ Monitor\ Controller|special_archiver|SpeedySpider|Sphider|Sphider2|spider|Spider.TerraNautic.net|SpiderEngine|SpiderKU|SpiderMan|Spinn3r|Spinne|sportcrew-Bot|spyder3.microsys.com|sqlmap|Squid-Prefetch|SquidClamAV_Redirector|Sqworm|SrevBot|sslbot|SSM\ Agent|StackRambler|StarDownloader|statbot|statcrawler|statedept-crawler|Steeler|STEGMANN-Bot|stero|Stripper|Stumbler|suchclip|sucker|SumeetBot|SumitBot|SummizeBot|SummizeFeedReader|SuperBot|superbot.com|SuperHTTP|SuperLumin|SuperPagesBot|Supybot|SURF|Surfbot|SurfControl|SurveyBot|suzuran|SWEBot|swish-e|SygolBot|SynapticWalker|Syntryx\ ANT\ Scout\ Chassis\ Pheromone|SystemSearch-robot|Szukacz).*$" />
</conditions>
<action type="CustomResponse" statusCode="403" statusReason="Forbidden" statusDescription="Forbidden" />
</rule>
Your pattern MJ12bot|spbot|YandexBot is a regex pattern but the pattern syntax is configured as Wildcard, so no matches found.
Remove the attribute patternSyntax="Wildcard" from your configuration and replace <match url="*" /> with <match url=".*" /> then it will work as expected.
<rule name="RequestBlockingRule1" stopProcessing="true">
<match url=".*" />
<conditions logicalGrouping="MatchAny">
<add input="{HTTP_USER_AGENT}" pattern="MJ12bot|spbot|YandexBot" />
</conditions>
<action type="CustomResponse" statusCode="403" statusReason="Forbidden: Access is denied." statusDescription="You do not have permission to view this directory or page using the credentials that you supplied." />
</rule>
Related
I am doing the web.config transformations. Here is my web.config -
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<location path="." inheritInChildApplications="false">
<system.webServer>
<httpProtocol>
<customHeaders>
<add name="X-Content-Type-Options" value="nosniff" />
<add name="Content-Security-Policy-Report-Only" value="frame-ancestors 'self'" />
</customHeaders>
</httpProtocol>
<handlers>
<add name="aspNetCore" path="*" verb="*" modules="AspNetCoreModuleV2" resourceType="Unspecified" />
</handlers>
<rewrite>
<rules>
<rule name="https redirect">
<match url="(.*)" ignoreCase="false" />
<conditions>
<add input="{HTTPS}" pattern="off" ignoreCase="false" />
</conditions>
<action type="Redirect" redirectType="Found" url="https://{HTTP_HOST}{REQUEST_URI}" />
</rule>
</rules>
<outboundRules>
<rule name="Remove RESPONSE_Server" >
<match serverVariable="RESPONSE_Server" pattern=".+" />
<action type="Rewrite" value="" />
</rule>
<rule name="Adding HSTS" enabled="true">
<match serverVariable="RESPONSE_Strict_Transport_Security"
pattern=".*" />
<conditions>
<add input="{HTTPS}" pattern="on" ignoreCase="true" />
</conditions>
<action type="Rewrite" value="max-age=31536000" />
</rule>
</outboundRules>
</rewrite>
<aspNetCore processPath="dotnet" arguments=".\App.dll" stdoutLogEnabled="false" stdoutLogFile=".\logs\stdout" hostingModel="inprocess">
<environmentVariables>
<environmentVariable name="ASPNETCORE_ENVIRONMENT" value="Local" />
</environmentVariables>
</aspNetCore>
</system.webServer>
<system.web>
<httpCookies httpOnlyCookies="true" requireSSL="true" />
</system.web>
</location>
</configuration>
My web.release.config is same as web.config. And web.Development.config is like below.
<?xml version="1.0" encoding="utf-8"?>
<configuration xmlns:xdt="http://schemas.microsoft.com/XML-Document-Transform">
<system.webServer>
<aspNetCore processPath="dotnet" arguments=".\App.dll" stdoutLogEnabled="false" stdoutLogFile=".\logs\stdout">
<environmentVariables>
<environmentVariable name="ASPNETCORE_ENVIRONMENT" value="Development" xdt:Transform="Replace" xdt:Locator="Match(name)"/>
</environmentVariables>
</aspNetCore>
</system.webServer>
</configuration>
But when I am publishing using "dotnet publish --configuration Development /p:EnvironmentName=Development" command, it says No element in the source document matches '/configuration/system.webServer'
Also while deploying to server I am getiing this -
[apply APPName_DEV\web.Development.config to APPName_DEV\web.config] W:\Buildnumber\APPName_DEV\web.Development.config:7,7 - No element in the source document matches '/configuration/system.webServer'
Please help me on this, thanks in advance :)
I think u need to remove <location> from your base config or add <location> to you development config.
u trying to replace <configuration><system.webServer>
but in your base config u have <configuration><location><system.webServer>
Need your help.
There is one IIS server in Azure.
It contains the Website and the Angular SPA application.
I need to install in a separate directory WordPress blog.
Blog unfolded.
But there was a problem with Choosing your permalink structure.
I have an error:
403 - Forbidden: Access is denied.
Credentials that you are supplied with.
I have two web.config files
1. ROOT web.config
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<location path=".">
<system.webServer>
<security>
<authentication>
<anonymousAuthentication enabled="true" />
</authentication>
</security>
<rewrite>
<rules>
<clear />
<rule name="Redirect http to https" stopProcessing="true">
<match url=".*" />
<conditions logicalGrouping="MatchAll" trackAllCaptures="false">
<add input="{HTTPS}" pattern="^OFF$" />
</conditions>
<action type="Redirect" url="https://{HTTP_HOST}/{R:0}" />
</rule>
<rule name="login redirect" patternSyntax="ECMAScript" stopProcessing="true">
<match url="^login" />
<conditions logicalGrouping="MatchAll" trackAllCaptures="false" />
<action type="Redirect" url="ANGULAR-SPA-DIR/login" />
</rule>
<rule name="AngularJS Routes" stopProcessing="true">
<match url=".*" />
<conditions logicalGrouping="MatchAll" trackAllCaptures="false">
<add input="{REQUEST_FILENAME}" matchType="IsFile" negate="true" />
<add input="{REQUEST_FILENAME}" matchType="IsDirectory" negate="true" />
<add input="{REQUEST_URI}" matchType="Pattern" pattern="/ROOT-DIR/" ignoreCase="true" negate="true" />
<add input="{REQUEST_URI}" matchType="Pattern" pattern="/ANGULAR-SPA-DIR/" ignoreCase="true" negate="true" />
<add input="{REQUEST_URI}" matchType="Pattern" pattern="/TEST/" ignoreCase="true" negate="true" />
</conditions>
<action type="Rewrite" url="/index.html" />
</rule>
<rule name="AngularJS Routes1" stopProcessing="true">
<match url="^ANGULAR-SPA-DIR/" />
<conditions logicalGrouping="MatchAll" trackAllCaptures="false">
<add input="{REQUEST_FILENAME}" matchType="IsFile" negate="true" />
<add input="{REQUEST_FILENAME}" matchType="IsDirectory" negate="true" />
</conditions>
<action type="Rewrite" url="/ANGULAR-SPA-DIR/index.html" />
</rule>
<rule name="AngularJS Routes2" stopProcessing="true">
<match url="^TEST/" />
<conditions logicalGrouping="MatchAll" trackAllCaptures="false">
<add input="{REQUEST_FILENAME}" matchType="IsFile" negate="true" />
<add input="{REQUEST_FILENAME}" matchType="IsDirectory" negate="true" />
</conditions>
<action type="Rewrite" url="/TEST/index.html" />
</rule>
<rule name="WordPress Blog" stopProcessing="true">
<match url="blog/.*" />
<conditions logicalGrouping="MatchAll" trackAllCaptures="true">
<add input="{REQUEST_FILENAME}" matchType="IsFile" />
<add input="{REQUEST_FILENAME}" matchType="IsDirectory" />
</conditions>
<action type="None" logRewrittenUrl="true" />
</rule>
</rules>
</rewrite>
<caching>
<profiles>
<add extension=".css" policy="CacheForTimePeriod" kernelCachePolicy="DontCache" duration="00:30:00" />
<add extension=".svg" policy="CacheForTimePeriod" kernelCachePolicy="DontCache" duration="01:00:00" />
<add extension=".jpg" policy="CacheForTimePeriod" kernelCachePolicy="DontCache" duration="01:00:00" />
<add extension=".js" policy="CacheForTimePeriod" kernelCachePolicy="DontCache" duration="00:30:00" />
<add extension=".woff2" policy="CacheForTimePeriod" kernelCachePolicy="DontCache" duration="24.00:00:00" />
</profiles>
</caching>
<directoryBrowse enabled="true" />
</system.webServer>
</location>
</configuration>
Blog web.config
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<system.webServer>
<rewrite>
<rules><remove name="WordPress Blog"/>
<rule name="WordPress: https://ROOT-URL/blog" patternSyntax="Wildcard">
<match url="*"/>
<conditions>
<add input="{REQUEST_FILENAME}" matchType="IsFile" negate="true"/>
<add input="{REQUEST_FILENAME}" matchType="IsDirectory" negate="true"/>
</conditions>
<action type="Rewrite" url="index.php"/>
</rule></rules>
</rewrite>
</system.webServer>
</configuration>
What can suggest experienced friends?
Add srting to ROOT web.config
<conditions logicalGrouping="MatchAll" trackAllCaptures="false">
...
<add input="{QUERY_STRING}" pattern="/blog/" />
</conditions>
After that, they "Choosing your permalink structure" work correctly
I have added multiple http bindings to my site like:
http://sub1.domain.com
http://sub2.domain.com
http://sub3.domain.com
http://sub4.domain.com
http://sub5.domain.com
I need to add different query string to those URLs when user hits any of those URLs.
http://sub1.domain.com/?qs=10
http://sub2.domain.com/?qs=15
http://sub3.domain.com/?qs=25
http://sub4.domain.com/?qs=30
http://sub5.domain.com/?qs=50
I'm thinking to keep query string values in appSettings keys. like
<appSettings>
<add key ="sub1" value="10" />
<add key ="sub2" value="15" />
...
</appSettings>
I wrote following rule that appends fixed query string. But it'll append qs=10 for all five URLs. But I'm clueless about making it dynamic.
<rule name="Add query string param" stopProcessing="true">
<match url=".*" />
<conditions>
<add input="{QUERY_STRING}" pattern="qs=10" negate="true" />
<add input="&{QUERY_STRING}" pattern="^(&.+)|^&$" />
</conditions>
<action type="Redirect" url="http://{HTTP_HOST}/{R:0}?qs=10{C:1}" appendQueryString="false" />
</rule>
You should probably look at rewrite maps. I haven't tested it, but it should be something like this:
<rewrite>
<rewriteMaps>
<rewriteMap name="SubDomainQueryStrings">
<add key="sub1.domain.com" value="qs=10" />
<add key="sub2.domain.com" value="qs=15" />
<add key="sub3.domain.com" value="qs=25" />
<add key="sub4.domain.com" value="qs=30" />
<add key="sub5.domain.com" value="qs=50" />
</rewriteMap>
</rewriteMaps>
<rules>
<rule name="Add query string param" stopProcessing="true">
<match url=".*" />
<conditions>
<add input="{QUERY_STRING}" pattern="{SubDomainQueryStrings:{HTTP_HOST}}" negate="true" />
<add input="&{QUERY_STRING}" pattern="^(&.+)|^&$" />
</conditions>
<action type="Redirect" url="{R:0}?{SubDomainQueryStrings:{HTTP_HOST}}{C:1}" appendQueryString="false"/>
</rule>
</rules>
<rewrite>
You need to use a HTTP_HOST condition to check for the subdomain like that:
<add input="{HTTP_HOST}" pattern="^sub1" />
Code for first two subdomains should look like that:
<rule name="Add query string param 10" stopProcessing="true">
<match url=".*" />
<conditions>
<add input="{HTTP_HOST}" pattern="^sub1" />
<add input="{QUERY_STRING}" pattern="qs=10" negate="true" />
<add input="&{QUERY_STRING}" pattern="^(&.+)|^&$" />
</conditions>
<action type="Redirect" url="http://{HTTP_HOST}/{R:0}?qs=10{C:1}" appendQueryString="false" />
</rule>
<rule name="Add query string param 15" stopProcessing="true">
<match url=".*" />
<conditions>
<add input="{HTTP_HOST}" pattern="^sub2" />
<add input="{QUERY_STRING}" pattern="qs=15" negate="true" />
<add input="&{QUERY_STRING}" pattern="^(&.+)|^&$" />
</conditions>
<action type="Redirect" url="http://{HTTP_HOST}/{R:0}?qs=15{C:1}" appendQueryString="false" />
</rule>
I have a URL Rewrite in place which works fine BUT when you put "/" in the end it does not work...
how do i work out those urls ... please suggest
<rewriteMap name="Survey2013">
<add key="/discount" value="/survey/store/" />
<add key="/discount/" value="/survey/store/" /> // i have to add this URL as well
<add key="/discounts" value="/survey/store/" />
<add key="/discounts/" value="/survey/store/" />
</rewriteMap>
<rule name="Redirect for Survey2013" enabled="true">
<match url=".*" />
<conditions logicalGrouping="MatchAll" trackAllCaptures="false">
<add input="{Survey2013:{REQUEST_URI}}" pattern="(.+)" />
</conditions>
<action type="Redirect" url="{C:1}" appendQueryString="true" />
</rule>
You can change the pattern in your condition to take an optional trailing / with pattern="(.+)/?".
Your rule will become:
<rule name="Redirect for Survey2013" enabled="true">
<match url=".*" />
<conditions logicalGrouping="MatchAll" trackAllCaptures="false">
<add input="{Survey2013:{REQUEST_URI}}" pattern="(.+)/?" />
</conditions>
<action type="Redirect" url="{C:1}" appendQueryString="true" />
</rule>
I have defined a URL Rewrite rule through IIS. Basically it turns something like this:
Article.aspx?ID=1&FriendlyURL=whatever
INTO
/1/whatever
Please note that Redirection is working right, but URL Rewrite (links within the page) are not being translated unless I am inside the Article.aspx page.
How can I make the Rewrite Rule apply to all the pages instead of only one? I'm posting below the written rules from Web.Config for your reference. Thanks.
<system.webServer>
<rewrite>
<outboundRules>
<rule name="OutboundRewriteUserFriendlyURL1" preCondition="ResponseIsHtml1">
<match filterByTags="A, Form, Img" pattern="^(.*/)Article\.aspx\?ID=([^=&]+)&(?:amp;)?FriendlyURL=([^=&]+)$" />
<action type="Rewrite" value="{R:1}{R:2}/{R:3}/" />
</rule>
<preConditions>
<preCondition name="ResponseIsHtml1">
<add input="{RESPONSE_CONTENT_TYPE}" pattern="^text/html" />
</preCondition>
</preConditions>
</outboundRules>
<rewriteMaps>
<rewriteMap name="Article Rewrite">
<add key="Article.aspx?ID=1&FriendlyURL=whatever" value="/1/whatever" />
</rewriteMap>
</rewriteMaps>
<rules>
<rule name="RedirectUserFriendlyURL1" stopProcessing="true">
<match url="^Article\.aspx$" />
<conditions>
<add input="{REQUEST_METHOD}" pattern="^POST$" negate="true" />
<add input="{QUERY_STRING}" pattern="^ID=([^=&]+)&FriendlyURL=([^=&]+)$" />
</conditions>
<action type="Redirect" url="{C:1}/{C:2}" appendQueryString="false" />
</rule>
<rule name="RewriteUserFriendlyURL1" stopProcessing="true">
<match url="^([^/]+)/([^/]+)/?$" />
<conditions>
<add input="{REQUEST_FILENAME}" matchType="IsFile" negate="true" />
<add input="{REQUEST_FILENAME}" matchType="IsDirectory" negate="true" />
</conditions>
<action type="Rewrite" url="Article.aspx?ID={R:1}&FriendlyURL={R:2}" />
</rule>
</rules>
</rewrite>
</system.webServer>
So I finally had to hard-code the links to be url-friendly by setting the "href" attribute within the code.
Something like this:
<a href='/1/hello-world/'>Read the "Hello World" Article</a>
Thanks.
I like regular expressions problems, try this.
<system.webServer>
<rewrite>
<outboundRules>
<clear />
<rule name="OutboundRewriteUserFriendlyURL1"
preCondition="ResponseIsHtml1">
<match filterByTags="A, Form, Img"
pattern="^(.*/)([^\.]+)\.aspx\?ID=([^=&]+)&(?:amp;)?FriendlyURL=([^=&]+)$" />
<conditions logicalGrouping="MatchAll"
trackAllCaptures="true" />
<action type="Rewrite"
value="{R:1}{R:2}/{R:3}/{R:4}/" />
</rule>
<rule name="OutboundRewriteUserFriendlyURL2"
preCondition="ResponseIsHtml1">
<match filterByTags="A, Form, Img"
pattern="^(.*)\?ID=([^=&]+)&(?:amp;)?FriendlyURL=([^=&]+)$" />
<conditions logicalGrouping="MatchAll"
trackAllCaptures="true" />
<action type="Rewrite"
value="" />
</rule>
<preConditions>
<preCondition name="ResponseIsHtml1">
<add input="{RESPONSE_CONTENT_TYPE}"
pattern="^text/html" />
</preCondition>
</preConditions>
</outboundRules>
<rewriteMaps>
<rewriteMap name="Article Rewrite">
<add key="Article.aspx?ID=1&FriendlyURL=whatever"
value="/1/whatever" />
</rewriteMap>
</rewriteMaps>
<rules>
<rule name="RedirectUserFriendlyURL1"
stopProcessing="true">
<match url="^([^\.]+)\.aspx$" />
<conditions>
<add input="{REQUEST_METHOD}"
pattern="^POST$"
negate="true" />
<add input="{QUERY_STRING}"
pattern="^ID=([^=&]+)&FriendlyURL=([^=&]+)$" />
</conditions>
<action type="Redirect"
url="{R:1}/{C:1}/{C:2}"
appendQueryString="false" />
</rule>
<rule name="RewriteUserFriendlyURL1"
stopProcessing="true">
<match url="^([^/]+)/([^/]+)/([^/]+)/?$" />
<conditions>
<add input="{REQUEST_FILENAME}"
matchType="IsFile"
negate="true" />
<add input="{REQUEST_FILENAME}"
matchType="IsDirectory"
negate="true" />
</conditions>
<action type="Rewrite"
url="{R:1}.aspx?ID={R:2}&FriendlyURL={R:3}" />
</rule>
</rules>
</rewrite>
<urlCompression doStaticCompression="false"
doDynamicCompression="false" />
</system.webServer>
The problem is in your OutboundRewrite rule's regular expression. I suggest you get a regex tool like expresso (my favorite), start with a very simple regex, then add complexity as your situation dictates.
The simplest regex that will match your example is:
Article\.aspx\?ID=(\d)&FriendlyURL=(.*)
Here's an example. Godspeed.