我正在尝试编写一个脚本来监视网页的差异,并在检测到差异时收到电子邮件通知。
代码见下文;
# monitor.sh - Monitors a web page for changes
# sends an email notification if the file change
URL="http://www.pagetomonitor.com"
for (( ; ; )); do
mv new.html old.html 2> /dev/null
curl $URL -L --compressed -s > new.html
DIFF_OUTPUT="$(diff new.html old.html)"
if [ "0" != "${#DIFF_OUTPUT}" ]; then
text="${DIFF_OUTPUT}Visit $URL"
echo "$text" | mail -s "Web Page Changed" [email protected]
sleep 10
fi
sleep 10
done
该代码运行良好,除了我正在监视的网站有一些会话 ID 以及每次curl 下载页面时都会更改的服务器时间戳。因此页面已经根据 diff 发生了变化。运行代码,如您所料,我每 10 秒就会收到以下电子邮件;
123c123
< <div class="entry-content" data-refresh-
id="1491986690.2021">
---
> <div class="entry-content" data-refresh-
id="1491986684.18823">
169c169
< SEE.options = { ajaxRequestInterval: 5000, tooltipFadeOut: 2500,
serverTime: 1491990290202, debug: false, locationCookieName: 'see-loc' };
---
> SEE.options = { ajaxRequestInterval: 5000, tooltipFadeOut: 2500,
serverTime: 1491990284188, debug: false, locationCookieName: 'see-loc' };
所以..我的问题是,有没有办法让我可以忽略数字或特定行,这样我只会收到内容更改的通知。另外,如果有人想取笑我的代码,那就继续吧。
谢谢克里斯。
更新:
我已经使用 ilkkachu 的代码成功过滤掉了服务器 ID 和时间问题。
但我每隔几分钟就会收到以下电子邮件。我似乎无法追踪它是从哪里来的;
12c12
< <meta charset="UTF-8" /><script type="text/javascript">window.NREUM||(NREUM={});NREUM.info = {"beacon":"bam.nr-data.net","errorBeacon":"bam.nr-data.net","licenseKey":"abab2c60d3","applicationID":"2043048","transactionName":"YgYHY0ZQWBZQVERbV1tMKGF3HnUKX0NVXEx2DAtDRl5aCVRFH3tWUQYd","queueTime":0,"applicationTime":16,"ttGuid":"E9414B3E5CD35372","agent":""}</script><script type="text/javascript">window.NREUM||(NREUM={}),__nr_require=function(e,n,t){function r(t){if(!n[t]){var o=n[t]={exports:{}};e[t][0].call(o.exports,function(n){var o=e[t][1][n];return r(o||n)},o,o.exports)}return n[t].exports}if("function"==typeof __nr_require)return __nr_require;for(var o=0;o<t.length;o++)r(t[o]);return r}({1:[function(e,n,t){function r(){}function o(e,n,t){return function(){return i(e,[c.now()].concat(u(arguments)),n?null:this,t),n?void 0:this}}var i=e("handle"),a=e(2),u=e(3),f=e("ee").get("tracer"),c=e("loader"),s=NREUM;"undefined"==typeof window.newrelic&&(newrelic=s);var p=["setPageViewName","setCustomAttribute","setErrorHandler","finished","addToTrace","inlineHit","addRelease"],d="api-",l=d+"ixn-";a(p,function(e,n){s[n]=o(d+n,!0,"api")}),s.addPageAction=o(d+"addPageAction",!0),s.setCurrentRouteName=o(d+"routeName",!0),n.exports=newrelic,s.interaction=function(){return(new r).get()};var m=r.prototype={createTracer:function(e,n){var t={},r=this,o="function"==typeof n;return i(l+"tracer",[c.now(),e,t],r),function(){if(f.emit((o?"":"no-")+"fn-start",[c.now(),r,o],t),o)try{return n.apply(this,arguments)}finally{f.emit("fn-end",[c.now()],t)}}}};a("setName,setAttribute,save,ignore,onEnd,getContext,end,get".split(","),function(e,n){m[n]=o(l+n)}),newrelic.noticeError=function(e){"string"==typeof e&&(e=new Error(e)),i("err",[e,c.now()])}},{}],2:[function(e,n,t){function r(e,n){var t=[],r="",i=0;for(r in e)o.call(e,r)&&(t[i]=n(r,e[r]),i+=1);return t}var o=Object.prototype.hasOwnProperty;n.exports=r},{}],3:[function(e,n,t){function r(e,n,t){n||(n=0),"undefined"==typeof t&&(t=e?e.length:0);for(var r=-1,o=t-n||0,i=Array(o<0?0:o);++r<o;)i[r]=e[n+r];return i}n.exports=r},{}],4:[function(e,n,t){n.exports={exists:"undefined"!=typeof window.performance&&window.performance.timing&&"undefined"!=typeof window.performance.timing.navigationStart}},{}],ee:[function(e,n,t){function r(){}function o(e){function n(e){return e&&e instanceof r?e:e?f(e,u,i):i()}function t(t,r,o,i){if(!d.aborted||i){e&&e(t,r,o);for(var a=n(o),u=m(t),f=u.length,c=0;c<f;c++)u[c].apply(a,r);var p=s[y[t]];return p&&p.push([b,t,r,a]),a}}function l(e,n){v[e]=m(e).concat(n)}function m(e){return v[e]||[]}function w(e){return p[e]=p[e]||o(t)}function g(e,n){c(e,function(e,t){n=n||"feature",y[t]=n,n in s||(s[n]=[])})}var v={},y={},b={on:l,emit:t,get:w,listeners:m,context:n,buffer:g,abort:a,aborted:!1};return b}function i(){return new r}function a(){(s.api||s.feature)&&(d.aborted=!0,s=d.backlog={})}var u="nr@context",f=e("gos"),c=e(2),s={},p={},d=n.exports=o();d.backlog=s},{}],gos:[function(e,n,t){function r(e,n,t){if(o.call(e,n))return e[n];var r=t();if(Object.defineProperty&&Object.keys)try{return Object.defineProperty(e,n,{value:r,writable:!0,enumerable:!1}),r}catch(i){}return e[n]=r,r}var o=Object.prototype.hasOwnProperty;n.exports=r},{}],handle:[function(e,n,t){function r(e,n,t,r){o.buffer([e],r),o.emit(e,n,t)}var o=e("ee").get("handle");n.exports=r,r.ee=o},{}],id:[function(e,n,t){function r(e){var n=typeof e;return!e||"object"!==n&&"function"!==n?-1:e===window?0:a(e,i,function(){return o++})}var o=1,i="nr@id",a=e("gos");n.exports=r},{}],loader:[function(e,n,t){function r(){if(!x++){var e=h.info=NREUM.info,n=d.getElementsByTagName("script")[0];if(setTimeout(s.abort,3e4),!(e&&e.licenseKey&&e.applicationID&&n))return s.abort();c(y,function(n,t){e[n]||(e[n]=t)}),f("mark",["onload",a()+h.offset],null,"api");var t=d.createElement("script");t.src="https://"+e.agent,n.parentNode.insertBefore(t,n)}}function o(){"complete"===d.readyState&&i()}function i(){f("mark",["domContent",a()+h.offset],null,"api")}function a(){return E.exists&&performance.now?Math.round(performance.now()):(u=Math.max((new Date).getTime(),u))-h.offset}var u=(new Date).getTime(),f=e("handle"),c=e(2),s=e("ee"),p=window,d=p.document,l="addEventListener",m="attachEvent",w=p.XMLHttpRequest,g=w&&w.prototype;NREUM.o={ST:setTimeout,CT:clearTimeout,XHR:w,REQ:p.Request,EV:p.Event,PR:p.Promise,MO:p.MutationObserver};var v=""+location,y={beacon:"bam.nr-data.net",errorBeacon:"bam.nr-data.net",agent:"js-agent.newrelic.com/nr-1026.min.js"},b=w&&g&&g[l]&&!/CriOS/.test(navigator.userAgent),h=n.exports={offset:u,now:a,origin:v,features:{},xhrWrappable:b};e(1),d[l]?(d[l]("DOMContentLoaded",i,!1),p[l]("load",r,!1)):(d[m]("onreadystatechange",o),p[m]("onload",r)),f("mark",["firstbyte",u],null,"api");var x=0,E=e(4)},{}]},{},["loader"]);</script>
---
> <meta charset="UTF-8" />
是否可以忽略第 12 行?
谢谢克里斯
答案1
将输入传递过去sed
并清除经常更改的部分,然后才进行比较?
curl $URL -L --compressed -s |
sed -E -e 's/data-refresh-id="[0-9.]+"/data-refresh-id="0"/'
-e 's/serverTime: [0-9]+/serverTime: 0/' > new.html
答案2
bash
来救援。
diff <(sed 's/\(data-refresh-id\)=".*"/\1/;s/\(serverTime\): [0-9]*/\1/' old.txt) \
<(sed 's/\(data-refresh-id\)=".*"/\1/;s/\(serverTime\): [0-9]*/\1/' new.txt)
所以在你的情况下
DIFF_OUTPUT="$(diff <(sed 's/\(data-refresh-id\)=".*"/\1/;s/\(serverTime\): [0-9]*/\1/' old.txt) <(sed 's/\(data-refresh-id\)=".*"/\1/;s/\(serverTime\): [0-9]*/\1/' new.txt))