[Users] Downloads for offline use need to be multi-threaded for speed

Rajib Bandopadhyay bkpsusmitaa at gmail.com
Mon Sep 8 07:23:46 CEST 2014


On 08/09/2014, Paul
<claws at thewildbeast.co.uk> wrote:

...

[snipped]

> Ahh, right. OK. It all sounds so
> simple when you put it like that. So
> we'll be seeing your patch in, say, a
> month??
>
> On the other hand, it could be that
> your way off the mark and it's
> actually a lot more complex than you
> imagine.
...
[/snipped]

No, I am confident on your abilities as
coders. I found the following
interesting. I looked into the code,
not because someone threw me a
challenge, but to see for myself the
complexity of the issue.
Multi-threading is not as difficult as
it sounds. We need to think how wiki
collaboratively update a single
document.

Again, please don't challenge me to
write multi-threading codes similar to
wiki to update the local folders with
each instance of input by each separate
members of the multi-thread. :)

In the end, I am a lazy coder, and my
background is physics, not computer
science. But I have tremendous
interest in everything under the sun.

The codes to limit multi-threading in
httrack:
(1) In source file htsopt.h

  float maxconn;                //
  nombre max de connexions/s


(2) In source file htslib.c
  opt->maxconn = 5.0;           //
nombre connexions/s

(3) In source file htalias.c
const char *hts_optalias[][4] = {
...
{"sockets", "-c", "param", "number of
simultaneous connections allowed"},
    {"socket", "-c", "param", "number
of simultaneous connections allowed"},
    {"connection", "-c", "param",
    "number of simultaneous connections
    allowed"},

(4) In source file htscoremain.c
    /* Enforce limits to avoid
bandwidth abuse. The bypass_limits
should only be used by administrators
and experts. */ if
(!opt->bypass_limits) { if (opt->maxsoc
<= 0 || opt->maxsoc > 8) { opt->maxsoc
= 8; hts_log_print(opt, LOG_WARNING, "*
security warning: maximum number of
simultaneous connections limited to %d
to avoid server overload", (int)
opt->maxsoc); } if (opt->maxrate <= 0
|| opt->maxrate > 250000)
{ opt->maxrate = 250000;
hts_log_print(opt, LOG_WARNING, "*
security warning: maximum bandwidth
limited to %d to avoid server
overload", (int) opt->maxrate); } if
(opt->maxconn <= 0 || opt->maxconn >
5.0) { opt->maxconn = 5.0;
hts_log_print(opt, LOG_WARNING, "*
security warning: maximum number of
connections per second limited to %f to
avoid server overload", (float)
opt->maxconn); } } else
{ hts_log_print(opt, LOG_WARNING, "*
security warning: !!! BYPASSING
SECURITY LIMITS - MONITOR THIS SESSION
WITH EXTREME CARE !!!"); }

(5) (A) In source file htscore.c

  // connect limiter
  if (n > 0 && opt->maxconn > 0 &&
  HTS_STAT.last_connect > 0) { TStamp
  opTime = HTS_STAT.last_request ?
  HTS_STAT.last_request :
  HTS_STAT.last_connect; TStamp cTime =
  mtime_local(); TStamp lap = (cTime -
  opTime); TStamp minLap = (TStamp)
  (1000.0 / opt->maxconn);

    if (lap < minLap) {
      n = 0;
    } else if (minLap != 0) {
      int nMax = (int) (lap / minLap);

      n = min(n, nMax);
    }
  }

  return n;
}

and
(5)(B)   if (from->maxconn > 0)
    to->maxconn = from->maxconn;

(6) HTSEXT_API void qsec2str is a
function I find important, defined in
httrack-library.h and active in
conversion in htslib.c
// idem, plus court (chaine)
HTSEXT_API void qsec2str(char *st,
TStamp t) {
  int j, h, m, s;

(7) There are source files such as
htsftp.c, htscore.c and htslib.c that
sets the timestamp via TStamp, TStamp
plays an important role in controlling
the simultaneous connections indirectly.

(8) In source file htsback.c
// clear, or leave for keep-alive
void back_maydeletehttp(httrackp * opt,
cache_back * cache, struct_back *
sback, const int p) { lien_back *const
back = sback->lnk; const int back_max =
sback->count; TStamp lt = 0;

  assertf(p >= 0 && p < back_max);
  if (back[p].r.soc != INVALID_SOCKET) {
    int q;

    if (back[p].r.soc !=
    INVALID_SOCKET /* security check */
    && back[p].r.statuscode >= 0    /*
    no timeout errors & co */ &&
    back[p].r.keep_alive_trailers ==
    0   /* not yet supported (chunk
    trailers) */ /* Socket not in I/O
    error status */
    && !back[p].r.is_file
    && !check_sockerror(back[p].r.soc) /*
    Keep-alive authorized by user */
    && !opt->nokeepalive /* Socket
    currently is keep-alive! */ &&
    back[p].r.keep_alive /* Remaining
    authorized requests */ &&
    back[p].r.keep_alive_max > 1 /*
    Known keep-alive start (security)
    */ && back[p].ka_time_start /*
    We're on time */ && (lt =
    time_local()) <
    back[p].ka_time_start +
    back[p].r.keep_alive_t /*
    Connection delay must not exceed
    keep-alive timeout */ &&
    (opt->maxconn <= 0 ||
    (back[p].r.keep_alive_t > (1.0 /
    opt->maxconn))) /* Available slot
    in backing */ && (q =
    back_search(opt, sback)) >= 0)
    { lien_back tmp;

      strcpybuff(tmp.url_adr,
      back[p].url_adr);
      tmp.ka_time_start =
      back[p].ka_time_start;
      deletehttp(&back[q].r);   //
      security check
      back_connxfr(&back[p].r,
      &back[q].r);     // transfer live
      connection settings from p to q
      back[q].ka_time_start =
      back[p].ka_time_start;    //
      refresh back[p].r.soc =
      INVALID_SOCKET;
      strcpybuff(back[q].url_adr,
      tmp.url_adr); // address
      back[q].ka_time_start =
      tmp.ka_time_start; back[q].status
      = STATUS_ALIVE;    // alive &
      waiting
      assertf(back[q].ka_time_start !=
      0); hts_log_print(opt, LOG_DEBUG,
      "(Keep-Alive): successfully
      preserved #%d (%s)",
      back[q].r.debugid,
      back[q].url_adr); } else
      { deletehttp(&back[p].r);
      back[p].r.soc =
      INVALID_SOCKET; } } }

(9) In the source file htsparse.c

      // autres occupations de HTTrack:
      statistiques, boucle d'attente,
      etc. if ((opt->makestat) ||
      (opt->maketrack)) { TStamp l =
      time_local();

        if ((int) (l - makestat_time)
        >= 60) { if (makestat_fp !=
        NULL) { fspc(NULL, makestat_fp,
        "info"); fprintf(makestat_fp,
                    "Rate= %d (/"
        LLintP ") \11NewLinks= %d
        (/%d)" LF, (int)
        ((HTS_STAT.HTS_TOTAL_RECV -
        *stre->makestat_total_) / (l -
        makestat_time)), (LLint)
        HTS_STAT.HTS_TOTAL_RECV, (int)
        opt->lien_tot -
        *stre->makestat_lnk_, (int)
        opt->lien_tot);
        fflush(makestat_fp);
        *stre->makestat_total_ =
        HTS_STAT.HTS_TOTAL_RECV;
        *stre->makestat_lnk_ =
        heap_top_index(); } if
        (stre->maketrack_fp != NULL)
        { int i;

            fspc(NULL,
            stre->maketrack_fp,
            "info");
            fprintf(stre->maketrack_fp,
            LF); for(i = 0; i <
            back_max; i++)
            { back_info(sback, i, 3,
            stre->maketrack_fp); }
            fprintf(stre->maketrack_fp,
            LF);
            fflush(stre->maketrack_fp);

          }
          makestat_time = l;
        }
      }

      /* cancel links */
      {
        int i;
        char *s;

        while((s =
        hts_cancel_file_pop(opt)) !=
        NULL) { if (strnotempty(s))
        { // fichier à canceller for(i
        = 0; i < back_max; i++) { if
        ((back[i].status > 0)) { if
        (strcmp(back[i].url_sav, s) ==
        0) {  // ok trouvé if
        (back[i].status != 1000) { #if
        HTS_DEBUG_CLOSESOCK
        DEBUG_W("user cancel:
        deletehttp\n"); #endif if
        (back[i].r.soc !=
        INVALID_SOCKET)
        deletehttp(&back[i].r);
        back[i].r.soc = INVALID_SOCKET;
        back[i].r.statuscode =
        STATUSCODE_INVALID;
        strcpybuff(back[i].r.msg,
        "Cancelled by User");
        back[i].status = 0; // terminé
        back_set_finished(sback, i); }
        else        // cancel ftp..
        flag à 1 back[i].stop_ftp =
        1; } } } s[0] = '\0'; }
        freet(s); }

        // Transfer rate
        engine_stats();

        // Refresh various stats
        HTS_STAT.stat_nsocket =
        back_nsoc(sback);
        HTS_STAT.stat_errors =
        fspc(opt, NULL, "error");
        HTS_STAT.stat_warnings =
        fspc(opt, NULL, "warning");
        HTS_STAT.stat_infos = fspc(opt,
        NULL, "info"); HTS_STAT.nbk =
        backlinks_done(sback,
        opt->liens, opt->lien_tot,
        ptr); HTS_STAT.nb =
        back_transferred(HTS_STAT.stat_bytes,
        sback);

        if (!RUN_CALLBACK7
            (opt, loop, sback->lnk,
        sback->count, b, ptr,
        opt->lien_tot, (int)
        (time_local() -
        HTS_STAT.stat_timestart),
        &HTS_STAT))
        { hts_log_print(opt, LOG_ERROR,
        "Exit requested by shell or
        user"); *stre->exit_xh_ =
        1;  // exit requested
        XH_uninit; return 0; }

      }

(10) In the source file htsweb.c and
httrack.c

  static TStamp prev_mytime =
  0;        /* ok */ static t_InpInfo
  SInfo;       /* ok */

  //
  TStamp mytime;
  long int rate = 0;

  //

(11) In the file htscore.c TStamp
controls connection limit.

(12) In httrack.h, we have:
  TStamp stat_timestart;

(13) In httrack-library.h, there is 
HTSEXT_API TStamp mtime_local(void);

(14) In htslib.h
HTSEXT_API void qsec2str(char *st,
TStamp t);

There are references of TStamp in
 htsparse.h, htsweb.h, htscore.h,
htsback.h, htsopt.h, htsglobal.h, 
...
Maybe, next time? I am getting tired.



More information about the Users mailing list