[Users] Downloads for offline use need to be multi-threaded for speed
Rajib Bandopadhyay
bkpsusmitaa at gmail.com
Mon Sep 8 07:23:46 CEST 2014
On 08/09/2014, Paul
<claws at thewildbeast.co.uk> wrote:
...
[snipped]
> Ahh, right. OK. It all sounds so
> simple when you put it like that. So
> we'll be seeing your patch in, say, a
> month??
>
> On the other hand, it could be that
> your way off the mark and it's
> actually a lot more complex than you
> imagine.
...
[/snipped]
No, I am confident on your abilities as
coders. I found the following
interesting. I looked into the code,
not because someone threw me a
challenge, but to see for myself the
complexity of the issue.
Multi-threading is not as difficult as
it sounds. We need to think how wiki
collaboratively update a single
document.
Again, please don't challenge me to
write multi-threading codes similar to
wiki to update the local folders with
each instance of input by each separate
members of the multi-thread. :)
In the end, I am a lazy coder, and my
background is physics, not computer
science. But I have tremendous
interest in everything under the sun.
The codes to limit multi-threading in
httrack:
(1) In source file htsopt.h
float maxconn; //
nombre max de connexions/s
(2) In source file htslib.c
opt->maxconn = 5.0; //
nombre connexions/s
(3) In source file htalias.c
const char *hts_optalias[][4] = {
...
{"sockets", "-c", "param", "number of
simultaneous connections allowed"},
{"socket", "-c", "param", "number
of simultaneous connections allowed"},
{"connection", "-c", "param",
"number of simultaneous connections
allowed"},
(4) In source file htscoremain.c
/* Enforce limits to avoid
bandwidth abuse. The bypass_limits
should only be used by administrators
and experts. */ if
(!opt->bypass_limits) { if (opt->maxsoc
<= 0 || opt->maxsoc > 8) { opt->maxsoc
= 8; hts_log_print(opt, LOG_WARNING, "*
security warning: maximum number of
simultaneous connections limited to %d
to avoid server overload", (int)
opt->maxsoc); } if (opt->maxrate <= 0
|| opt->maxrate > 250000)
{ opt->maxrate = 250000;
hts_log_print(opt, LOG_WARNING, "*
security warning: maximum bandwidth
limited to %d to avoid server
overload", (int) opt->maxrate); } if
(opt->maxconn <= 0 || opt->maxconn >
5.0) { opt->maxconn = 5.0;
hts_log_print(opt, LOG_WARNING, "*
security warning: maximum number of
connections per second limited to %f to
avoid server overload", (float)
opt->maxconn); } } else
{ hts_log_print(opt, LOG_WARNING, "*
security warning: !!! BYPASSING
SECURITY LIMITS - MONITOR THIS SESSION
WITH EXTREME CARE !!!"); }
(5) (A) In source file htscore.c
// connect limiter
if (n > 0 && opt->maxconn > 0 &&
HTS_STAT.last_connect > 0) { TStamp
opTime = HTS_STAT.last_request ?
HTS_STAT.last_request :
HTS_STAT.last_connect; TStamp cTime =
mtime_local(); TStamp lap = (cTime -
opTime); TStamp minLap = (TStamp)
(1000.0 / opt->maxconn);
if (lap < minLap) {
n = 0;
} else if (minLap != 0) {
int nMax = (int) (lap / minLap);
n = min(n, nMax);
}
}
return n;
}
and
(5)(B) if (from->maxconn > 0)
to->maxconn = from->maxconn;
(6) HTSEXT_API void qsec2str is a
function I find important, defined in
httrack-library.h and active in
conversion in htslib.c
// idem, plus court (chaine)
HTSEXT_API void qsec2str(char *st,
TStamp t) {
int j, h, m, s;
(7) There are source files such as
htsftp.c, htscore.c and htslib.c that
sets the timestamp via TStamp, TStamp
plays an important role in controlling
the simultaneous connections indirectly.
(8) In source file htsback.c
// clear, or leave for keep-alive
void back_maydeletehttp(httrackp * opt,
cache_back * cache, struct_back *
sback, const int p) { lien_back *const
back = sback->lnk; const int back_max =
sback->count; TStamp lt = 0;
assertf(p >= 0 && p < back_max);
if (back[p].r.soc != INVALID_SOCKET) {
int q;
if (back[p].r.soc !=
INVALID_SOCKET /* security check */
&& back[p].r.statuscode >= 0 /*
no timeout errors & co */ &&
back[p].r.keep_alive_trailers ==
0 /* not yet supported (chunk
trailers) */ /* Socket not in I/O
error status */
&& !back[p].r.is_file
&& !check_sockerror(back[p].r.soc) /*
Keep-alive authorized by user */
&& !opt->nokeepalive /* Socket
currently is keep-alive! */ &&
back[p].r.keep_alive /* Remaining
authorized requests */ &&
back[p].r.keep_alive_max > 1 /*
Known keep-alive start (security)
*/ && back[p].ka_time_start /*
We're on time */ && (lt =
time_local()) <
back[p].ka_time_start +
back[p].r.keep_alive_t /*
Connection delay must not exceed
keep-alive timeout */ &&
(opt->maxconn <= 0 ||
(back[p].r.keep_alive_t > (1.0 /
opt->maxconn))) /* Available slot
in backing */ && (q =
back_search(opt, sback)) >= 0)
{ lien_back tmp;
strcpybuff(tmp.url_adr,
back[p].url_adr);
tmp.ka_time_start =
back[p].ka_time_start;
deletehttp(&back[q].r); //
security check
back_connxfr(&back[p].r,
&back[q].r); // transfer live
connection settings from p to q
back[q].ka_time_start =
back[p].ka_time_start; //
refresh back[p].r.soc =
INVALID_SOCKET;
strcpybuff(back[q].url_adr,
tmp.url_adr); // address
back[q].ka_time_start =
tmp.ka_time_start; back[q].status
= STATUS_ALIVE; // alive &
waiting
assertf(back[q].ka_time_start !=
0); hts_log_print(opt, LOG_DEBUG,
"(Keep-Alive): successfully
preserved #%d (%s)",
back[q].r.debugid,
back[q].url_adr); } else
{ deletehttp(&back[p].r);
back[p].r.soc =
INVALID_SOCKET; } } }
(9) In the source file htsparse.c
// autres occupations de HTTrack:
statistiques, boucle d'attente,
etc. if ((opt->makestat) ||
(opt->maketrack)) { TStamp l =
time_local();
if ((int) (l - makestat_time)
>= 60) { if (makestat_fp !=
NULL) { fspc(NULL, makestat_fp,
"info"); fprintf(makestat_fp,
"Rate= %d (/"
LLintP ") \11NewLinks= %d
(/%d)" LF, (int)
((HTS_STAT.HTS_TOTAL_RECV -
*stre->makestat_total_) / (l -
makestat_time)), (LLint)
HTS_STAT.HTS_TOTAL_RECV, (int)
opt->lien_tot -
*stre->makestat_lnk_, (int)
opt->lien_tot);
fflush(makestat_fp);
*stre->makestat_total_ =
HTS_STAT.HTS_TOTAL_RECV;
*stre->makestat_lnk_ =
heap_top_index(); } if
(stre->maketrack_fp != NULL)
{ int i;
fspc(NULL,
stre->maketrack_fp,
"info");
fprintf(stre->maketrack_fp,
LF); for(i = 0; i <
back_max; i++)
{ back_info(sback, i, 3,
stre->maketrack_fp); }
fprintf(stre->maketrack_fp,
LF);
fflush(stre->maketrack_fp);
}
makestat_time = l;
}
}
/* cancel links */
{
int i;
char *s;
while((s =
hts_cancel_file_pop(opt)) !=
NULL) { if (strnotempty(s))
{ // fichier à canceller for(i
= 0; i < back_max; i++) { if
((back[i].status > 0)) { if
(strcmp(back[i].url_sav, s) ==
0) { // ok trouvé if
(back[i].status != 1000) { #if
HTS_DEBUG_CLOSESOCK
DEBUG_W("user cancel:
deletehttp\n"); #endif if
(back[i].r.soc !=
INVALID_SOCKET)
deletehttp(&back[i].r);
back[i].r.soc = INVALID_SOCKET;
back[i].r.statuscode =
STATUSCODE_INVALID;
strcpybuff(back[i].r.msg,
"Cancelled by User");
back[i].status = 0; // terminé
back_set_finished(sback, i); }
else // cancel ftp..
flag à 1 back[i].stop_ftp =
1; } } } s[0] = '\0'; }
freet(s); }
// Transfer rate
engine_stats();
// Refresh various stats
HTS_STAT.stat_nsocket =
back_nsoc(sback);
HTS_STAT.stat_errors =
fspc(opt, NULL, "error");
HTS_STAT.stat_warnings =
fspc(opt, NULL, "warning");
HTS_STAT.stat_infos = fspc(opt,
NULL, "info"); HTS_STAT.nbk =
backlinks_done(sback,
opt->liens, opt->lien_tot,
ptr); HTS_STAT.nb =
back_transferred(HTS_STAT.stat_bytes,
sback);
if (!RUN_CALLBACK7
(opt, loop, sback->lnk,
sback->count, b, ptr,
opt->lien_tot, (int)
(time_local() -
HTS_STAT.stat_timestart),
&HTS_STAT))
{ hts_log_print(opt, LOG_ERROR,
"Exit requested by shell or
user"); *stre->exit_xh_ =
1; // exit requested
XH_uninit; return 0; }
}
(10) In the source file htsweb.c and
httrack.c
static TStamp prev_mytime =
0; /* ok */ static t_InpInfo
SInfo; /* ok */
//
TStamp mytime;
long int rate = 0;
//
(11) In the file htscore.c TStamp
controls connection limit.
(12) In httrack.h, we have:
TStamp stat_timestart;
(13) In httrack-library.h, there is
HTSEXT_API TStamp mtime_local(void);
(14) In htslib.h
HTSEXT_API void qsec2str(char *st,
TStamp t);
There are references of TStamp in
htsparse.h, htsweb.h, htscore.h,
htsback.h, htsopt.h, htsglobal.h,
...
Maybe, next time? I am getting tired.
More information about the Users
mailing list